In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [2]:
%%capture
!pip install transformers

In [3]:
import torch
import numpy as np
import pandas as pd
import torch.nn as nn

import transformers
from sklearn.metrics import *
from transformers import AdamW
from tqdm.notebook import tqdm
from scipy.special import softmax
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split as tts
from transformers import BertTokenizerFast, BertConfig, BertForSequenceClassification, AutoModel
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

In [4]:
# Define the device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

#### Download

In [5]:
cd drive/My Drive/Colab Notebooks/experiments

/content/drive/My Drive/Colab Notebooks/experiments


In [6]:
# Download humor detection data
# Paper: https://arxiv.org/abs/2004.12765
data = pd.read_csv("data/trofix.csv")
print("\nThere are", len(data), "sentences")

# Use the standard text/label columns
# Create labels: 1 --> humorous, 0 --> not humorous
data["label"] = data["label"].apply(int)
data.head()


There are 1444 sentences


Unnamed: 0,arg1,arg2,verb,sentence,verb_stem,label
0,mileage,struck,blow,Triple mileage has struck another blow to the ...,strike,1
1,terrorist,attack,target,U.S. officials said evidence suggests that a J...,attack,0
2,forces,stepped,use,"Some police forces , for example , have steppe...",step,0
3,day,pour,stream,"Every day his troops gather under the green , ...",pour,0
4,manufacturers,rolling,products,He says manufacturers are increasingly rolling...,roll,1


#### Split to training, validation and test



In [7]:
# Use a subset for quick experiments
#subset_data = data[:10000]

# Split to train, val and test
train, test = tts(data[["sentence", "label"]], random_state=42, test_size=0.1)
train, val = tts(train, random_state=42, test_size=test.shape[0])

#### Tokenize and encode with BERT tokenizer

In [8]:
# Construct a BERT tokenizer based on WordPiece
bert_tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=466062.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=28.0, style=ProgressStyle(description_w…




In [9]:
# A sanity check of the tokenizer
encoded_instance = bert_tokenizer.batch_encode_plus([train.iloc[0].sentence], padding=True)
print(encoded_instance)

{'input_ids': [[101, 2074, 2004, 26665, 1998, 10093, 22139, 2215, 2000, 6723, 1037, 3538, 1997, 22035, 15312, 1005, 1055, 3700, 2011, 5378, 1057, 1012, 1055, 1012, 1041, 15549, 7368, 16614, 1010, 22035, 15312, 2003, 19285, 2037, 14585, 1998, 14163, 13112, 3971, 2000, 3749, 20874, 2592, 2006, 4964, 1011, 3318, 5693, 1998, 3097, 12731, 14343, 14767, 1012, 102]], 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]}


In [10]:
print("Original text:", train.iloc[0].sentence)
print("BERT BPEs:", bert_tokenizer.convert_ids_to_tokens(encoded_instance["input_ids"][0]))

Original text: Just as Reuters and Telerate want to grab a piece of Quotron 's territory by offering U.S. equities quotes , Quotron is eyeing their turf and mulling ways to offer pricing information on fixed-income instruments and foreign currencies .
BERT BPEs: ['[CLS]', 'just', 'as', 'reuters', 'and', 'tel', '##erate', 'want', 'to', 'grab', 'a', 'piece', 'of', 'quo', '##tron', "'", 's', 'territory', 'by', 'offering', 'u', '.', 's', '.', 'e', '##qui', '##ties', 'quotes', ',', 'quo', '##tron', 'is', 'eyeing', 'their', 'turf', 'and', 'mu', '##lling', 'ways', 'to', 'offer', 'pricing', 'information', 'on', 'fixed', '-', 'income', 'instruments', 'and', 'foreign', 'cu', '##rre', '##ncies', '.', '[SEP]']


In [11]:
# Set max_len to the maximum length of the training data 
max_len = max([len(bert_tokenizer.encode(s)) for s in train.sentence.to_list()])
print("The maximum sentence length in training based on BERT BPEs is", max_len)

The maximum sentence length in training based on BERT BPEs is 105


In [12]:
# Tokenize and encode sentences in each set
x_train = bert_tokenizer.batch_encode_plus(
    train.sentence.tolist(),
    max_length = max_len,
    padding=True,
    truncation=True
)
x_val = bert_tokenizer.batch_encode_plus(
    val.sentence.tolist(),
    max_length = max_len,
    padding=True,
    truncation=True
)
x_test = bert_tokenizer.batch_encode_plus(
    test.sentence.tolist(),
    max_length = max_len,
    padding=True,
    truncation=True
)

In [13]:
# Convert lists to tensors in order to feed them to our PyTorch model
train_seq = torch.tensor(x_train['input_ids'])
train_mask = torch.tensor(x_train['attention_mask'])
train_y = torch.tensor(train.label.tolist())

val_seq = torch.tensor(x_val['input_ids'])
val_mask = torch.tensor(x_val['attention_mask'])
val_y = torch.tensor(val.label.tolist())

test_seq = torch.tensor(x_test['input_ids'])
test_mask = torch.tensor(x_test['attention_mask'])
test_y = torch.tensor(test.label.tolist())

In [14]:
batch_size = 32

# Create a dataloader for each set

# TensorDataset: Creates a PyTorch dataset object to load data from
train_data = TensorDataset(train_seq, train_mask, train_y)
# RandomSampler: specify the sequence of indices/keys used in data loading
train_sampler = RandomSampler(train_data)
# DataLoader: a Python iterable over a dataset
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

val_data = TensorDataset(val_seq, val_mask, val_y)
val_sampler = SequentialSampler(val_data)
val_dataloader = DataLoader(val_data, sampler=val_sampler, batch_size=batch_size)

test_data = TensorDataset(test_seq, test_mask, test_y)
test_sampler = SequentialSampler(test_data)
test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=1)

## Build and train the model

In [15]:
# Define which BERT model to use
# We will use BERT base pre-trained on uncased text
model_name = "bert-base-uncased"
# The BertForSequenceClassification class creates a model with BERT and a classifier on top
# The classifier is a linear layer with two outputs (two is the default, if you have more labels change the config)
# It uses the CrossEntropyLoss from PyTorch
# from_pretrained() is used to load pre-trained weights
model = BertForSequenceClassification.from_pretrained(model_name, output_attentions=True)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=570.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=440473133.0, style=ProgressStyle(descri…




Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [16]:
# Training method
def training():
  # Set to train mode
  model.train()
  total_loss, total_accuracy = 0, 0
  # Iterate through the training batches
  for batch in tqdm(train_dataloader, desc="Iteration"):    
    # Push the batch to gpu
    batch = [r.to(device) for r in batch] 
    sent_id, mask, labels = batch
    # Clear gradients 
    model.zero_grad()
    # Get model outputs
    outputs = model(sent_id, attention_mask=mask, labels=labels)
    # Get loss
    loss = outputs.loss
    # Add to the total loss
    total_loss = total_loss + loss
    # Backward pass to calculate the gradients
    loss.backward()
    # Update parameters
    optimizer.step()
  # Compute the training loss of the epoch
  epoch_loss = total_loss / len(train_dataloader)

  return epoch_loss

In [17]:
# Evaluation method
def evaluate():  
  print("\nEvaluating...")  
  # Set to eval mode
  model.eval()
  total_loss, total_accuracy = 0, 0
  predictions, targets = [], []
  # Iterate through the validation batches
  for batch in val_dataloader:
    # Push the batch to gpu
    batch = [t.to(device) for t in batch]
    sent_id, mask, labels = batch
    # Save the gold labels to use them for evaluation
    targets.extend(labels.detach().cpu().numpy())
    # Deactivate autograd
    with torch.no_grad():
      # Get model outputs
      outputs = model(sent_id, attention_mask=mask, labels=labels)
      # Get loss
      loss = outputs.loss
      total_loss = total_loss + loss
      # Apply softmax to the output of the model
      output_probs = softmax(outputs.logits.detach().cpu().numpy(), axis=1)
      # Get the index with the largest probability as the predicted label
      predictions.extend(np.argmax(output_probs, axis=1))
  # Compute the validation loss of the epoch
  epoch_loss = total_loss / len(val_dataloader)

  return epoch_loss, targets, predictions

In [18]:
# Push model to gpu
model = model.to(device)
# Define the optimizer and the learning rate
optimizer = AdamW(model.parameters(), lr = 2e-5)

best_val_loss = float('inf')
best_epoch = -1
train_losses=[]
val_losses=[]
epochs = 5
# Define the number of epochs to wait for early stopping
patience = 3

# Train the model
for epoch in range(epochs):     
  print('\n Epoch {:} / {:}'.format(epoch + 1, epochs))    
  train_loss = training()
  val_loss, val_targets, val_predictions = evaluate()

  train_losses.append(train_loss)
  val_losses.append(val_loss)

  print("\nTraining Loss:", train_loss)
  print("Validation Loss:", val_loss)
  # Calculate the validation F1 score for the current epoch
  f1 = f1_score(val_targets, val_predictions, average="binary")
  print("F1 score:", round(f1, 3))

  # Save the model with the best validation loss
  if val_loss < best_val_loss:
    best_val_loss = val_loss
    best_epoch = epoch
    torch.save(model.state_dict(), 'saved_weights.pt')

  # Early stopping
  if ((epoch - best_epoch) >= patience):
    print("No improvement in", patience, "epochs. Stopped training.")
    break



 Epoch 1 / 5


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=37.0, style=ProgressStyle(description_wid…



Evaluating...

Training Loss: tensor(0.6766, device='cuda:0', grad_fn=<DivBackward0>)
Validation Loss: tensor(0.6662, device='cuda:0')
F1 score: 0.408

 Epoch 2 / 5


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=37.0, style=ProgressStyle(description_wid…



Evaluating...

Training Loss: tensor(0.6317, device='cuda:0', grad_fn=<DivBackward0>)
Validation Loss: tensor(0.6213, device='cuda:0')
F1 score: 0.449

 Epoch 3 / 5


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=37.0, style=ProgressStyle(description_wid…



Evaluating...

Training Loss: tensor(0.4814, device='cuda:0', grad_fn=<DivBackward0>)
Validation Loss: tensor(0.6090, device='cuda:0')
F1 score: 0.662

 Epoch 4 / 5


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=37.0, style=ProgressStyle(description_wid…



Evaluating...

Training Loss: tensor(0.2746, device='cuda:0', grad_fn=<DivBackward0>)
Validation Loss: tensor(0.6445, device='cuda:0')
F1 score: 0.656

 Epoch 5 / 5


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=37.0, style=ProgressStyle(description_wid…



Evaluating...

Training Loss: tensor(0.1225, device='cuda:0', grad_fn=<DivBackward0>)
Validation Loss: tensor(0.7679, device='cuda:0')
F1 score: 0.678


In [19]:
# Save checkpoint to your drive
# Zip
#!zip saved_weights.zip  saved_weights.pt
# Mount
#from google.colab import drive
#drive.mount('/content/gdrive')
# Copy to your drive folder
#!cp -r saved_weights.zip /content/gdrive/MyDrive/

## Inference

#### Load the saved checkpoint

In [20]:
# Use this code to download the model saved in your drive 
# Add the id from the shareable link of the file 
# !gdown --id add_shareable_link_id
# !unzip saved_weights.zip

In [21]:
# Create the model
model_e = BertForSequenceClassification.from_pretrained("bert-base-uncased", output_attentions=True)
# Load pre-trained weights
#checkpoint = torch.load("saved_weights.pt", map_location="cpu")
# Add them to the model
#model_e.load_state_dict(checkpoint)
model_e = model_e.to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

#### Get predictions for test

In [22]:
# Predict for the test set and save the results
model_e.eval()
test_predictions = []
test_targets = []
test_attentions = []
test_inputs = []

for batch in test_dataloader:
  batch = [t.to(device) for t in batch]
  sent_id, mask, labels = batch
  # Get gold labels
  test_targets.extend(labels.detach().cpu().numpy())
  # Get input words
  test_inputs.append(bert_tokenizer.convert_ids_to_tokens(sent_id.detach().cpu().numpy()[0]))
  with torch.no_grad():
    # Get predictions
    outputs = model_e(sent_id, attention_mask=mask)
    # Apply softmax to the outputs
    output_probs = softmax(outputs.logits.detach().cpu().numpy(), axis=1)
    # Get the with the highest probability as the predicted label
    test_predictions.extend(np.argmax(output_probs, axis=1))
    # Get attention weights
    # Attention weights from all layers are returned in a tuple
    # The weights from each layer are in a tensor with shape (batch_size, attention_heads, max_len, max_len)
    test_attentions.append(outputs.attentions)

#### Evaluate

In [23]:
print("F1:", f1_score(test_targets, test_predictions, average="binary"))
print("ACC:", accuracy_score(test_targets, test_predictions))
print("AUPR:", average_precision_score(test_targets, test_predictions))
print("PRECISION:", precision_score(test_targets, test_predictions))
print("RECALL:", recall_score(test_targets, test_predictions))
print("AUC:", roc_auc_score(test_targets, test_predictions))

F1: 0.3157894736842105
ACC: 0.5517241379310345
AUPR: 0.4602122015915119
PRECISION: 0.5
RECALL: 0.23076923076923078
AUC: 0.5216346153846154


In [24]:
#max pooling to generate a fixed sized sentence embedding


#Max Pooling - Take the max value over time for every dimension
def max_pooling(model_output, attention_mask):
    token_embeddings = model_output[0] #First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.resize_(token_embeddings.size())
    #input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    token_embeddings[input_mask_expanded == 0] = -1e9  # Set padding tokens to large negative value
    max_over_time = torch.max(token_embeddings, 1)[0]
    return max_over_time

def avg_pooling(model_output, attention_mask):
    token_embeddings = model_output[0] #First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.resize_(token_embeddings.size())
    #input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    token_embeddings[input_mask_expanded == 0] = -1e9  # Set padding tokens to large negative value
    avg_over_time = torch.mean(token_embeddings, 1)[0]
    return avg_over_time


#Sentences we want sentence embeddings for
sentences = ['The', 'stars', 'gravitate', 'towards', 'each', 'other.']

#Tokenize sentences
encoded_input = bert_tokenizer(sentences, padding=True, truncation=True, max_length=128, return_tensors='pt')
encoded_input = encoded_input.to(device)

#Compute token embeddings
with torch.no_grad():
    model_output = model_e(**encoded_input)

#Perform pooling. In this case, max pooling
sentence_embeddings = max_pooling(model_output, encoded_input['attention_mask'])
#avg_sentence_embeddings = avg_pooling(model_output, encoded_input['attention_mask'])


print("Sentence embeddings:")
print(sentence_embeddings)

Sentence embeddings:
tensor([-2.1756e-02, -1.8703e-01, -1.8991e-01, -1.1120e-01, -1.0000e+09,
        -4.9664e-02], device='cuda:0')


In [25]:
# numpy implementation of argmax
from numpy import argmax

sentence_embeddings = sentence_embeddings.cpu()

# get argmax
result = argmax(sentence_embeddings)
print('arg max of %s: %d' % (sentence_embeddings, result))

arg max of tensor([-2.1756e-02, -1.8703e-01, -1.8991e-01, -1.1120e-01, -1.0000e+09,
        -4.9664e-02]): 0


In [26]:
#CLS token of each input represents the sentence embedding


#Sentences we want sentence embeddings for
sentences = ['The', 'stars', 'gravitate', 'towards', 'each', 'other']


#Tokenize sentences
encoded_input = bert_tokenizer(sentences, padding=True, truncation=True, max_length=128, return_tensors='pt')
encoded_input = encoded_input.to(device)

#Compute token embeddings
with torch.no_grad():
    model_output = model(**encoded_input)
    #model_output = model_output.to(device)
    
sentence_embeddings = model_output[0][:,0] #Take the first token ([CLS]) from each sentence 

print("Sentence embeddings:")
print(sentence_embeddings)

Sentence embeddings:
tensor([-0.1369, -0.1463,  0.3223, -0.0343,  0.1507,  0.1268], device='cuda:0')


In [27]:
# numpy implementation of argmax
from numpy import argmax

sentence_embeddings = sentence_embeddings.cpu()

# get argmax
result = argmax(sentence_embeddings)
print('arg max of %s: %d' % (sentence_embeddings, result))

arg max of tensor([-0.1369, -0.1463,  0.3223, -0.0343,  0.1507,  0.1268]): 2


## Attention analysis

In [28]:
# Get attention heatmaps
import matplotlib
from IPython.core.display import display, HTML
def colorize(words, color_array):
    cmap=matplotlib.cm.Reds
    template = '<span class="barcode"; style="color: black; background-color: {}">{}</span>'
    colored_string = ''
    for word, color in zip(words, color_array):
        color = matplotlib.colors.rgb2hex(cmap(color)[:3])
        colored_string += template.format(color, '&nbsp' + word + '&nbsp')
    return colored_string

#### What does the CLS token attend to?




In [29]:
# Max Pooling for all tokens in sentences and argmax

# Select some sentences randomly
sent_index = [0,1,2]

for s in sent_index:
  print("*" * 100)
  # Get the sentence's words
  tokens = test_inputs[s]
  # For each layer...
  for l in range(12):
    print("\nLayer", l+1)
    attention = np.squeeze(test_attentions[s][l].detach().cpu().numpy(), axis=0)
    # and for each head
    for h, head in enumerate(attention):
      print("Head", h+1)
      # Get the attention for the cls token
      encoded_tokens = bert_tokenizer(tokens, truncation=True, padding=True, max_length=128, return_tensors='pt')
      encoded_tokens = encoded_tokens.to(device)
      with torch.no_grad():
        model_output1 = model_e(**encoded_tokens)
        tokens_embeddings = max_pooling(model_output1, encoded_tokens['attention_mask'])
        tokens_embeddings = tokens_embeddings.cpu()
        cls_attentions = head[0]
        display(HTML(colorize(tokens, cls_attentions)))
        print("Tokens embeddings:")
        print(tokens_embeddings)
        arg = argmax(tokens_embeddings)
        print('arg max of %s: %d' % (tokens_embeddings, arg))

Output hidden; open in https://colab.research.google.com to view.

In [30]:
# Pooling First token [CLS] for each sentence - argmax

# Select some sentences randomly
sent_index = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

for s in sent_index:
  print("*" * 100)
  # Get the sentence's words
  tokens = test_inputs[s]
  # For each layer...
  for l in range(12):
    print("\nLayer", l+1)
    attention = np.squeeze(test_attentions[s][l].detach().cpu().numpy(), axis=0)
    # and for each head
    #for h, head in enumerate(attention):
    #print("Head", h+1)
    # Get the attention for the cls token
    encoded_tokens = bert_tokenizer(tokens, padding=True, truncation=True, max_length=128, return_tensors='pt')
    encoded_tokens = encoded_tokens.to(device)
    with torch.no_grad():
      model_output1 = model_e(**encoded_tokens)
      tokens_embeddings = model_output1[0][:,0]
      tokens_embeddings = tokens_embeddings.cpu()
      cls_attentions = head[0]
      display(HTML(colorize(tokens, cls_attentions)))
      print("Tokens embeddings:")
      print(tokens_embeddings)
      arg = argmax(tokens_embeddings)
      print('arg max of %s: %d' % (tokens_embeddings, arg))

****************************************************************************************************

Layer 1


Tokens embeddings:
tensor([-0.1035, -0.4488, -0.3234, -0.3167, -0.0896, -0.1921, -0.3635, -0.4255,
        -0.0965, -0.3583, -0.3623, -0.3303, -0.4406, -0.0896, -0.3039, -0.3958,
        -0.2996, -0.0040, -0.3234, -0.3929, -0.4187, -0.2571, -0.2364, -0.2672,
        -0.3924, -0.4187, -0.1741, -0.4183, -0.3542,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4488, -0.3234, -0.3167, -0.0896, -0.1921, -0.3635, -0.4255,
        -0.0965, -0.3583, -0.3623, -0.3303, -0.4406, -0.0896, -0.3039, -

Tokens embeddings:
tensor([-0.1035, -0.4488, -0.3234, -0.3167, -0.0896, -0.1921, -0.3635, -0.4255,
        -0.0965, -0.3583, -0.3623, -0.3303, -0.4406, -0.0896, -0.3039, -0.3958,
        -0.2996, -0.0040, -0.3234, -0.3929, -0.4187, -0.2571, -0.2364, -0.2672,
        -0.3924, -0.4187, -0.1741, -0.4183, -0.3542,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4488, -0.3234, -0.3167, -0.0896, -0.1921, -0.3635, -0.4255,
        -0.0965, -0.3583, -0.3623, -0.3303, -0.4406, -0.0896, -0.3039, -

Tokens embeddings:
tensor([-0.1035, -0.4488, -0.3234, -0.3167, -0.0896, -0.1921, -0.3635, -0.4255,
        -0.0965, -0.3583, -0.3623, -0.3303, -0.4406, -0.0896, -0.3039, -0.3958,
        -0.2996, -0.0040, -0.3234, -0.3929, -0.4187, -0.2571, -0.2364, -0.2672,
        -0.3924, -0.4187, -0.1741, -0.4183, -0.3542,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4488, -0.3234, -0.3167, -0.0896, -0.1921, -0.3635, -0.4255,
        -0.0965, -0.3583, -0.3623, -0.3303, -0.4406, -0.0896, -0.3039, -

Tokens embeddings:
tensor([-0.1035, -0.4488, -0.3234, -0.3167, -0.0896, -0.1921, -0.3635, -0.4255,
        -0.0965, -0.3583, -0.3623, -0.3303, -0.4406, -0.0896, -0.3039, -0.3958,
        -0.2996, -0.0040, -0.3234, -0.3929, -0.4187, -0.2571, -0.2364, -0.2672,
        -0.3924, -0.4187, -0.1741, -0.4183, -0.3542,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4488, -0.3234, -0.3167, -0.0896, -0.1921, -0.3635, -0.4255,
        -0.0965, -0.3583, -0.3623, -0.3303, -0.4406, -0.0896, -0.3039, -

Tokens embeddings:
tensor([-0.1035, -0.4488, -0.3234, -0.3167, -0.0896, -0.1921, -0.3635, -0.4255,
        -0.0965, -0.3583, -0.3623, -0.3303, -0.4406, -0.0896, -0.3039, -0.3958,
        -0.2996, -0.0040, -0.3234, -0.3929, -0.4187, -0.2571, -0.2364, -0.2672,
        -0.3924, -0.4187, -0.1741, -0.4183, -0.3542,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4488, -0.3234, -0.3167, -0.0896, -0.1921, -0.3635, -0.4255,
        -0.0965, -0.3583, -0.3623, -0.3303, -0.4406, -0.0896, -0.3039, -

Tokens embeddings:
tensor([-0.1035, -0.4488, -0.3234, -0.3167, -0.0896, -0.1921, -0.3635, -0.4255,
        -0.0965, -0.3583, -0.3623, -0.3303, -0.4406, -0.0896, -0.3039, -0.3958,
        -0.2996, -0.0040, -0.3234, -0.3929, -0.4187, -0.2571, -0.2364, -0.2672,
        -0.3924, -0.4187, -0.1741, -0.4183, -0.3542,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4488, -0.3234, -0.3167, -0.0896, -0.1921, -0.3635, -0.4255,
        -0.0965, -0.3583, -0.3623, -0.3303, -0.4406, -0.0896, -0.3039, -

Tokens embeddings:
tensor([-0.1035, -0.4488, -0.3234, -0.3167, -0.0896, -0.1921, -0.3635, -0.4255,
        -0.0965, -0.3583, -0.3623, -0.3303, -0.4406, -0.0896, -0.3039, -0.3958,
        -0.2996, -0.0040, -0.3234, -0.3929, -0.4187, -0.2571, -0.2364, -0.2672,
        -0.3924, -0.4187, -0.1741, -0.4183, -0.3542,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4488, -0.3234, -0.3167, -0.0896, -0.1921, -0.3635, -0.4255,
        -0.0965, -0.3583, -0.3623, -0.3303, -0.4406, -0.0896, -0.3039, -

Tokens embeddings:
tensor([-0.1035, -0.4488, -0.3234, -0.3167, -0.0896, -0.1921, -0.3635, -0.4255,
        -0.0965, -0.3583, -0.3623, -0.3303, -0.4406, -0.0896, -0.3039, -0.3958,
        -0.2996, -0.0040, -0.3234, -0.3929, -0.4187, -0.2571, -0.2364, -0.2672,
        -0.3924, -0.4187, -0.1741, -0.4183, -0.3542,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4488, -0.3234, -0.3167, -0.0896, -0.1921, -0.3635, -0.4255,
        -0.0965, -0.3583, -0.3623, -0.3303, -0.4406, -0.0896, -0.3039, -

Tokens embeddings:
tensor([-0.1035, -0.4488, -0.3234, -0.3167, -0.0896, -0.1921, -0.3635, -0.4255,
        -0.0965, -0.3583, -0.3623, -0.3303, -0.4406, -0.0896, -0.3039, -0.3958,
        -0.2996, -0.0040, -0.3234, -0.3929, -0.4187, -0.2571, -0.2364, -0.2672,
        -0.3924, -0.4187, -0.1741, -0.4183, -0.3542,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4488, -0.3234, -0.3167, -0.0896, -0.1921, -0.3635, -0.4255,
        -0.0965, -0.3583, -0.3623, -0.3303, -0.4406, -0.0896, -0.3039, -

Tokens embeddings:
tensor([-0.1035, -0.4488, -0.3234, -0.3167, -0.0896, -0.1921, -0.3635, -0.4255,
        -0.0965, -0.3583, -0.3623, -0.3303, -0.4406, -0.0896, -0.3039, -0.3958,
        -0.2996, -0.0040, -0.3234, -0.3929, -0.4187, -0.2571, -0.2364, -0.2672,
        -0.3924, -0.4187, -0.1741, -0.4183, -0.3542,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4488, -0.3234, -0.3167, -0.0896, -0.1921, -0.3635, -0.4255,
        -0.0965, -0.3583, -0.3623, -0.3303, -0.4406, -0.0896, -0.3039, -

Tokens embeddings:
tensor([-0.1035, -0.4488, -0.3234, -0.3167, -0.0896, -0.1921, -0.3635, -0.4255,
        -0.0965, -0.3583, -0.3623, -0.3303, -0.4406, -0.0896, -0.3039, -0.3958,
        -0.2996, -0.0040, -0.3234, -0.3929, -0.4187, -0.2571, -0.2364, -0.2672,
        -0.3924, -0.4187, -0.1741, -0.4183, -0.3542,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4488, -0.3234, -0.3167, -0.0896, -0.1921, -0.3635, -0.4255,
        -0.0965, -0.3583, -0.3623, -0.3303, -0.4406, -0.0896, -0.3039, -

Tokens embeddings:
tensor([-0.1035, -0.4488, -0.3234, -0.3167, -0.0896, -0.1921, -0.3635, -0.4255,
        -0.0965, -0.3583, -0.3623, -0.3303, -0.4406, -0.0896, -0.3039, -0.3958,
        -0.2996, -0.0040, -0.3234, -0.3929, -0.4187, -0.2571, -0.2364, -0.2672,
        -0.3924, -0.4187, -0.1741, -0.4183, -0.3542,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4488, -0.3234, -0.3167, -0.0896, -0.1921, -0.3635, -0.4255,
        -0.0965, -0.3583, -0.3623, -0.3303, -0.4406, -0.0896, -0.3039, -

Tokens embeddings:
tensor([-0.1035, -0.4345,  0.0038, -0.3664, -0.4714, -0.3857, -0.3447, -0.2382,
        -0.0038, -0.2364, -0.3878, -0.3543, -0.2992, -0.4044, -0.3857, -0.2705,
        -0.0802, -0.3335, -0.2664,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4345,  0.0038, -0.3664, -0.4714, -0.3857, -0.3447, -0.2382,
        -0.0038, -0.2364, -0.3878, -0.3543, -0.2992, -0.4044, -0.3857, -

Tokens embeddings:
tensor([-0.1035, -0.4345,  0.0038, -0.3664, -0.4714, -0.3857, -0.3447, -0.2382,
        -0.0038, -0.2364, -0.3878, -0.3543, -0.2992, -0.4044, -0.3857, -0.2705,
        -0.0802, -0.3335, -0.2664,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4345,  0.0038, -0.3664, -0.4714, -0.3857, -0.3447, -0.2382,
        -0.0038, -0.2364, -0.3878, -0.3543, -0.2992, -0.4044, -0.3857, -

Tokens embeddings:
tensor([-0.1035, -0.4345,  0.0038, -0.3664, -0.4714, -0.3857, -0.3447, -0.2382,
        -0.0038, -0.2364, -0.3878, -0.3543, -0.2992, -0.4044, -0.3857, -0.2705,
        -0.0802, -0.3335, -0.2664,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4345,  0.0038, -0.3664, -0.4714, -0.3857, -0.3447, -0.2382,
        -0.0038, -0.2364, -0.3878, -0.3543, -0.2992, -0.4044, -0.3857, -

Tokens embeddings:
tensor([-0.1035, -0.4345,  0.0038, -0.3664, -0.4714, -0.3857, -0.3447, -0.2382,
        -0.0038, -0.2364, -0.3878, -0.3543, -0.2992, -0.4044, -0.3857, -0.2705,
        -0.0802, -0.3335, -0.2664,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4345,  0.0038, -0.3664, -0.4714, -0.3857, -0.3447, -0.2382,
        -0.0038, -0.2364, -0.3878, -0.3543, -0.2992, -0.4044, -0.3857, -

Tokens embeddings:
tensor([-0.1035, -0.4345,  0.0038, -0.3664, -0.4714, -0.3857, -0.3447, -0.2382,
        -0.0038, -0.2364, -0.3878, -0.3543, -0.2992, -0.4044, -0.3857, -0.2705,
        -0.0802, -0.3335, -0.2664,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4345,  0.0038, -0.3664, -0.4714, -0.3857, -0.3447, -0.2382,
        -0.0038, -0.2364, -0.3878, -0.3543, -0.2992, -0.4044, -0.3857, -

Tokens embeddings:
tensor([-0.1035, -0.4345,  0.0038, -0.3664, -0.4714, -0.3857, -0.3447, -0.2382,
        -0.0038, -0.2364, -0.3878, -0.3543, -0.2992, -0.4044, -0.3857, -0.2705,
        -0.0802, -0.3335, -0.2664,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4345,  0.0038, -0.3664, -0.4714, -0.3857, -0.3447, -0.2382,
        -0.0038, -0.2364, -0.3878, -0.3543, -0.2992, -0.4044, -0.3857, -

Tokens embeddings:
tensor([-0.1035, -0.4345,  0.0038, -0.3664, -0.4714, -0.3857, -0.3447, -0.2382,
        -0.0038, -0.2364, -0.3878, -0.3543, -0.2992, -0.4044, -0.3857, -0.2705,
        -0.0802, -0.3335, -0.2664,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4345,  0.0038, -0.3664, -0.4714, -0.3857, -0.3447, -0.2382,
        -0.0038, -0.2364, -0.3878, -0.3543, -0.2992, -0.4044, -0.3857, -

Tokens embeddings:
tensor([-0.1035, -0.4345,  0.0038, -0.3664, -0.4714, -0.3857, -0.3447, -0.2382,
        -0.0038, -0.2364, -0.3878, -0.3543, -0.2992, -0.4044, -0.3857, -0.2705,
        -0.0802, -0.3335, -0.2664,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4345,  0.0038, -0.3664, -0.4714, -0.3857, -0.3447, -0.2382,
        -0.0038, -0.2364, -0.3878, -0.3543, -0.2992, -0.4044, -0.3857, -

Tokens embeddings:
tensor([-0.1035, -0.4345,  0.0038, -0.3664, -0.4714, -0.3857, -0.3447, -0.2382,
        -0.0038, -0.2364, -0.3878, -0.3543, -0.2992, -0.4044, -0.3857, -0.2705,
        -0.0802, -0.3335, -0.2664,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4345,  0.0038, -0.3664, -0.4714, -0.3857, -0.3447, -0.2382,
        -0.0038, -0.2364, -0.3878, -0.3543, -0.2992, -0.4044, -0.3857, -

Tokens embeddings:
tensor([-0.1035, -0.4345,  0.0038, -0.3664, -0.4714, -0.3857, -0.3447, -0.2382,
        -0.0038, -0.2364, -0.3878, -0.3543, -0.2992, -0.4044, -0.3857, -0.2705,
        -0.0802, -0.3335, -0.2664,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4345,  0.0038, -0.3664, -0.4714, -0.3857, -0.3447, -0.2382,
        -0.0038, -0.2364, -0.3878, -0.3543, -0.2992, -0.4044, -0.3857, -

Tokens embeddings:
tensor([-0.1035, -0.4345,  0.0038, -0.3664, -0.4714, -0.3857, -0.3447, -0.2382,
        -0.0038, -0.2364, -0.3878, -0.3543, -0.2992, -0.4044, -0.3857, -0.2705,
        -0.0802, -0.3335, -0.2664,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4345,  0.0038, -0.3664, -0.4714, -0.3857, -0.3447, -0.2382,
        -0.0038, -0.2364, -0.3878, -0.3543, -0.2992, -0.4044, -0.3857, -

Tokens embeddings:
tensor([-0.1035, -0.4345,  0.0038, -0.3664, -0.4714, -0.3857, -0.3447, -0.2382,
        -0.0038, -0.2364, -0.3878, -0.3543, -0.2992, -0.4044, -0.3857, -0.2705,
        -0.0802, -0.3335, -0.2664,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4345,  0.0038, -0.3664, -0.4714, -0.3857, -0.3447, -0.2382,
        -0.0038, -0.2364, -0.3878, -0.3543, -0.2992, -0.4044, -0.3857, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.2444, -0.3745, -0.3335, -0.4183, -0.3235, -0.0592,
        -0.4341, -0.3359, -0.0040, -0.3284, -0.4387, -0.0998, -0.0650, -0.2364,
         0.1070, -0.0661, -0.3573, -0.0040, -0.4376, -0.0015, -0.3226,  0.2886,
        -0.1008, -0.4662, -0.2503, -0.2364, -0.1911, -0.1008, -0.1150, -0.1503,
         0.0316, -0.0825,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.2444, -0.3745, -0.3335, -0.4183, -0.3235, -0.0592,
        -0.4341, -0.3359, -0.0040, -0.3284, -0.4387, -0.0998, -0.0650, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.2444, -0.3745, -0.3335, -0.4183, -0.3235, -0.0592,
        -0.4341, -0.3359, -0.0040, -0.3284, -0.4387, -0.0998, -0.0650, -0.2364,
         0.1070, -0.0661, -0.3573, -0.0040, -0.4376, -0.0015, -0.3226,  0.2886,
        -0.1008, -0.4662, -0.2503, -0.2364, -0.1911, -0.1008, -0.1150, -0.1503,
         0.0316, -0.0825,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.2444, -0.3745, -0.3335, -0.4183, -0.3235, -0.0592,
        -0.4341, -0.3359, -0.0040, -0.3284, -0.4387, -0.0998, -0.0650, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.2444, -0.3745, -0.3335, -0.4183, -0.3235, -0.0592,
        -0.4341, -0.3359, -0.0040, -0.3284, -0.4387, -0.0998, -0.0650, -0.2364,
         0.1070, -0.0661, -0.3573, -0.0040, -0.4376, -0.0015, -0.3226,  0.2886,
        -0.1008, -0.4662, -0.2503, -0.2364, -0.1911, -0.1008, -0.1150, -0.1503,
         0.0316, -0.0825,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.2444, -0.3745, -0.3335, -0.4183, -0.3235, -0.0592,
        -0.4341, -0.3359, -0.0040, -0.3284, -0.4387, -0.0998, -0.0650, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.2444, -0.3745, -0.3335, -0.4183, -0.3235, -0.0592,
        -0.4341, -0.3359, -0.0040, -0.3284, -0.4387, -0.0998, -0.0650, -0.2364,
         0.1070, -0.0661, -0.3573, -0.0040, -0.4376, -0.0015, -0.3226,  0.2886,
        -0.1008, -0.4662, -0.2503, -0.2364, -0.1911, -0.1008, -0.1150, -0.1503,
         0.0316, -0.0825,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.2444, -0.3745, -0.3335, -0.4183, -0.3235, -0.0592,
        -0.4341, -0.3359, -0.0040, -0.3284, -0.4387, -0.0998, -0.0650, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.2444, -0.3745, -0.3335, -0.4183, -0.3235, -0.0592,
        -0.4341, -0.3359, -0.0040, -0.3284, -0.4387, -0.0998, -0.0650, -0.2364,
         0.1070, -0.0661, -0.3573, -0.0040, -0.4376, -0.0015, -0.3226,  0.2886,
        -0.1008, -0.4662, -0.2503, -0.2364, -0.1911, -0.1008, -0.1150, -0.1503,
         0.0316, -0.0825,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.2444, -0.3745, -0.3335, -0.4183, -0.3235, -0.0592,
        -0.4341, -0.3359, -0.0040, -0.3284, -0.4387, -0.0998, -0.0650, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.2444, -0.3745, -0.3335, -0.4183, -0.3235, -0.0592,
        -0.4341, -0.3359, -0.0040, -0.3284, -0.4387, -0.0998, -0.0650, -0.2364,
         0.1070, -0.0661, -0.3573, -0.0040, -0.4376, -0.0015, -0.3226,  0.2886,
        -0.1008, -0.4662, -0.2503, -0.2364, -0.1911, -0.1008, -0.1150, -0.1503,
         0.0316, -0.0825,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.2444, -0.3745, -0.3335, -0.4183, -0.3235, -0.0592,
        -0.4341, -0.3359, -0.0040, -0.3284, -0.4387, -0.0998, -0.0650, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.2444, -0.3745, -0.3335, -0.4183, -0.3235, -0.0592,
        -0.4341, -0.3359, -0.0040, -0.3284, -0.4387, -0.0998, -0.0650, -0.2364,
         0.1070, -0.0661, -0.3573, -0.0040, -0.4376, -0.0015, -0.3226,  0.2886,
        -0.1008, -0.4662, -0.2503, -0.2364, -0.1911, -0.1008, -0.1150, -0.1503,
         0.0316, -0.0825,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.2444, -0.3745, -0.3335, -0.4183, -0.3235, -0.0592,
        -0.4341, -0.3359, -0.0040, -0.3284, -0.4387, -0.0998, -0.0650, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.2444, -0.3745, -0.3335, -0.4183, -0.3235, -0.0592,
        -0.4341, -0.3359, -0.0040, -0.3284, -0.4387, -0.0998, -0.0650, -0.2364,
         0.1070, -0.0661, -0.3573, -0.0040, -0.4376, -0.0015, -0.3226,  0.2886,
        -0.1008, -0.4662, -0.2503, -0.2364, -0.1911, -0.1008, -0.1150, -0.1503,
         0.0316, -0.0825,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.2444, -0.3745, -0.3335, -0.4183, -0.3235, -0.0592,
        -0.4341, -0.3359, -0.0040, -0.3284, -0.4387, -0.0998, -0.0650, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.2444, -0.3745, -0.3335, -0.4183, -0.3235, -0.0592,
        -0.4341, -0.3359, -0.0040, -0.3284, -0.4387, -0.0998, -0.0650, -0.2364,
         0.1070, -0.0661, -0.3573, -0.0040, -0.4376, -0.0015, -0.3226,  0.2886,
        -0.1008, -0.4662, -0.2503, -0.2364, -0.1911, -0.1008, -0.1150, -0.1503,
         0.0316, -0.0825,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.2444, -0.3745, -0.3335, -0.4183, -0.3235, -0.0592,
        -0.4341, -0.3359, -0.0040, -0.3284, -0.4387, -0.0998, -0.0650, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.2444, -0.3745, -0.3335, -0.4183, -0.3235, -0.0592,
        -0.4341, -0.3359, -0.0040, -0.3284, -0.4387, -0.0998, -0.0650, -0.2364,
         0.1070, -0.0661, -0.3573, -0.0040, -0.4376, -0.0015, -0.3226,  0.2886,
        -0.1008, -0.4662, -0.2503, -0.2364, -0.1911, -0.1008, -0.1150, -0.1503,
         0.0316, -0.0825,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.2444, -0.3745, -0.3335, -0.4183, -0.3235, -0.0592,
        -0.4341, -0.3359, -0.0040, -0.3284, -0.4387, -0.0998, -0.0650, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.2444, -0.3745, -0.3335, -0.4183, -0.3235, -0.0592,
        -0.4341, -0.3359, -0.0040, -0.3284, -0.4387, -0.0998, -0.0650, -0.2364,
         0.1070, -0.0661, -0.3573, -0.0040, -0.4376, -0.0015, -0.3226,  0.2886,
        -0.1008, -0.4662, -0.2503, -0.2364, -0.1911, -0.1008, -0.1150, -0.1503,
         0.0316, -0.0825,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.2444, -0.3745, -0.3335, -0.4183, -0.3235, -0.0592,
        -0.4341, -0.3359, -0.0040, -0.3284, -0.4387, -0.0998, -0.0650, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.2444, -0.3745, -0.3335, -0.4183, -0.3235, -0.0592,
        -0.4341, -0.3359, -0.0040, -0.3284, -0.4387, -0.0998, -0.0650, -0.2364,
         0.1070, -0.0661, -0.3573, -0.0040, -0.4376, -0.0015, -0.3226,  0.2886,
        -0.1008, -0.4662, -0.2503, -0.2364, -0.1911, -0.1008, -0.1150, -0.1503,
         0.0316, -0.0825,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.2444, -0.3745, -0.3335, -0.4183, -0.3235, -0.0592,
        -0.4341, -0.3359, -0.0040, -0.3284, -0.4387, -0.0998, -0.0650, -

Tokens embeddings:
tensor([-0.1035, -0.4618, -0.3734, -0.0896, -0.4183, -0.1490, -0.4172, -0.4197,
         0.0915, -0.1595, -0.2193, -0.2316, -0.1741, -0.3264, -0.4183, -0.1554,
        -0.2272, -0.4121, -0.3622, -0.2985, -0.0040, -0.3869, -0.2141, -0.3499,
        -0.0040, -0.0726, -0.3574, -0.4183, -0.2486, -0.3297, -0.4356, -0.2607,
        -0.2364, -0.1008, -0.3574, -0.2786, -0.3198, -0.3428, -0.4371, -0.0774,
        -0.1124,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4618, -0.3734, -0.0896, -0.4183, -0.1490, -0.4172, -0.4197,
         0.0915, -0.1595, -0.2193, -0.2316, -0.1741, -0.3264, -0.4183, -

Tokens embeddings:
tensor([-0.1035, -0.4618, -0.3734, -0.0896, -0.4183, -0.1490, -0.4172, -0.4197,
         0.0915, -0.1595, -0.2193, -0.2316, -0.1741, -0.3264, -0.4183, -0.1554,
        -0.2272, -0.4121, -0.3622, -0.2985, -0.0040, -0.3869, -0.2141, -0.3499,
        -0.0040, -0.0726, -0.3574, -0.4183, -0.2486, -0.3297, -0.4356, -0.2607,
        -0.2364, -0.1008, -0.3574, -0.2786, -0.3198, -0.3428, -0.4371, -0.0774,
        -0.1124,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4618, -0.3734, -0.0896, -0.4183, -0.1490, -0.4172, -0.4197,
         0.0915, -0.1595, -0.2193, -0.2316, -0.1741, -0.3264, -0.4183, -

Tokens embeddings:
tensor([-0.1035, -0.4618, -0.3734, -0.0896, -0.4183, -0.1490, -0.4172, -0.4197,
         0.0915, -0.1595, -0.2193, -0.2316, -0.1741, -0.3264, -0.4183, -0.1554,
        -0.2272, -0.4121, -0.3622, -0.2985, -0.0040, -0.3869, -0.2141, -0.3499,
        -0.0040, -0.0726, -0.3574, -0.4183, -0.2486, -0.3297, -0.4356, -0.2607,
        -0.2364, -0.1008, -0.3574, -0.2786, -0.3198, -0.3428, -0.4371, -0.0774,
        -0.1124,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4618, -0.3734, -0.0896, -0.4183, -0.1490, -0.4172, -0.4197,
         0.0915, -0.1595, -0.2193, -0.2316, -0.1741, -0.3264, -0.4183, -

Tokens embeddings:
tensor([-0.1035, -0.4618, -0.3734, -0.0896, -0.4183, -0.1490, -0.4172, -0.4197,
         0.0915, -0.1595, -0.2193, -0.2316, -0.1741, -0.3264, -0.4183, -0.1554,
        -0.2272, -0.4121, -0.3622, -0.2985, -0.0040, -0.3869, -0.2141, -0.3499,
        -0.0040, -0.0726, -0.3574, -0.4183, -0.2486, -0.3297, -0.4356, -0.2607,
        -0.2364, -0.1008, -0.3574, -0.2786, -0.3198, -0.3428, -0.4371, -0.0774,
        -0.1124,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4618, -0.3734, -0.0896, -0.4183, -0.1490, -0.4172, -0.4197,
         0.0915, -0.1595, -0.2193, -0.2316, -0.1741, -0.3264, -0.4183, -

Tokens embeddings:
tensor([-0.1035, -0.4618, -0.3734, -0.0896, -0.4183, -0.1490, -0.4172, -0.4197,
         0.0915, -0.1595, -0.2193, -0.2316, -0.1741, -0.3264, -0.4183, -0.1554,
        -0.2272, -0.4121, -0.3622, -0.2985, -0.0040, -0.3869, -0.2141, -0.3499,
        -0.0040, -0.0726, -0.3574, -0.4183, -0.2486, -0.3297, -0.4356, -0.2607,
        -0.2364, -0.1008, -0.3574, -0.2786, -0.3198, -0.3428, -0.4371, -0.0774,
        -0.1124,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4618, -0.3734, -0.0896, -0.4183, -0.1490, -0.4172, -0.4197,
         0.0915, -0.1595, -0.2193, -0.2316, -0.1741, -0.3264, -0.4183, -

Tokens embeddings:
tensor([-0.1035, -0.4618, -0.3734, -0.0896, -0.4183, -0.1490, -0.4172, -0.4197,
         0.0915, -0.1595, -0.2193, -0.2316, -0.1741, -0.3264, -0.4183, -0.1554,
        -0.2272, -0.4121, -0.3622, -0.2985, -0.0040, -0.3869, -0.2141, -0.3499,
        -0.0040, -0.0726, -0.3574, -0.4183, -0.2486, -0.3297, -0.4356, -0.2607,
        -0.2364, -0.1008, -0.3574, -0.2786, -0.3198, -0.3428, -0.4371, -0.0774,
        -0.1124,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4618, -0.3734, -0.0896, -0.4183, -0.1490, -0.4172, -0.4197,
         0.0915, -0.1595, -0.2193, -0.2316, -0.1741, -0.3264, -0.4183, -

Tokens embeddings:
tensor([-0.1035, -0.4618, -0.3734, -0.0896, -0.4183, -0.1490, -0.4172, -0.4197,
         0.0915, -0.1595, -0.2193, -0.2316, -0.1741, -0.3264, -0.4183, -0.1554,
        -0.2272, -0.4121, -0.3622, -0.2985, -0.0040, -0.3869, -0.2141, -0.3499,
        -0.0040, -0.0726, -0.3574, -0.4183, -0.2486, -0.3297, -0.4356, -0.2607,
        -0.2364, -0.1008, -0.3574, -0.2786, -0.3198, -0.3428, -0.4371, -0.0774,
        -0.1124,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4618, -0.3734, -0.0896, -0.4183, -0.1490, -0.4172, -0.4197,
         0.0915, -0.1595, -0.2193, -0.2316, -0.1741, -0.3264, -0.4183, -

Tokens embeddings:
tensor([-0.1035, -0.4618, -0.3734, -0.0896, -0.4183, -0.1490, -0.4172, -0.4197,
         0.0915, -0.1595, -0.2193, -0.2316, -0.1741, -0.3264, -0.4183, -0.1554,
        -0.2272, -0.4121, -0.3622, -0.2985, -0.0040, -0.3869, -0.2141, -0.3499,
        -0.0040, -0.0726, -0.3574, -0.4183, -0.2486, -0.3297, -0.4356, -0.2607,
        -0.2364, -0.1008, -0.3574, -0.2786, -0.3198, -0.3428, -0.4371, -0.0774,
        -0.1124,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4618, -0.3734, -0.0896, -0.4183, -0.1490, -0.4172, -0.4197,
         0.0915, -0.1595, -0.2193, -0.2316, -0.1741, -0.3264, -0.4183, -

Tokens embeddings:
tensor([-0.1035, -0.4618, -0.3734, -0.0896, -0.4183, -0.1490, -0.4172, -0.4197,
         0.0915, -0.1595, -0.2193, -0.2316, -0.1741, -0.3264, -0.4183, -0.1554,
        -0.2272, -0.4121, -0.3622, -0.2985, -0.0040, -0.3869, -0.2141, -0.3499,
        -0.0040, -0.0726, -0.3574, -0.4183, -0.2486, -0.3297, -0.4356, -0.2607,
        -0.2364, -0.1008, -0.3574, -0.2786, -0.3198, -0.3428, -0.4371, -0.0774,
        -0.1124,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4618, -0.3734, -0.0896, -0.4183, -0.1490, -0.4172, -0.4197,
         0.0915, -0.1595, -0.2193, -0.2316, -0.1741, -0.3264, -0.4183, -

Tokens embeddings:
tensor([-0.1035, -0.4618, -0.3734, -0.0896, -0.4183, -0.1490, -0.4172, -0.4197,
         0.0915, -0.1595, -0.2193, -0.2316, -0.1741, -0.3264, -0.4183, -0.1554,
        -0.2272, -0.4121, -0.3622, -0.2985, -0.0040, -0.3869, -0.2141, -0.3499,
        -0.0040, -0.0726, -0.3574, -0.4183, -0.2486, -0.3297, -0.4356, -0.2607,
        -0.2364, -0.1008, -0.3574, -0.2786, -0.3198, -0.3428, -0.4371, -0.0774,
        -0.1124,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4618, -0.3734, -0.0896, -0.4183, -0.1490, -0.4172, -0.4197,
         0.0915, -0.1595, -0.2193, -0.2316, -0.1741, -0.3264, -0.4183, -

Tokens embeddings:
tensor([-0.1035, -0.4618, -0.3734, -0.0896, -0.4183, -0.1490, -0.4172, -0.4197,
         0.0915, -0.1595, -0.2193, -0.2316, -0.1741, -0.3264, -0.4183, -0.1554,
        -0.2272, -0.4121, -0.3622, -0.2985, -0.0040, -0.3869, -0.2141, -0.3499,
        -0.0040, -0.0726, -0.3574, -0.4183, -0.2486, -0.3297, -0.4356, -0.2607,
        -0.2364, -0.1008, -0.3574, -0.2786, -0.3198, -0.3428, -0.4371, -0.0774,
        -0.1124,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4618, -0.3734, -0.0896, -0.4183, -0.1490, -0.4172, -0.4197,
         0.0915, -0.1595, -0.2193, -0.2316, -0.1741, -0.3264, -0.4183, -

Tokens embeddings:
tensor([-0.1035, -0.4618, -0.3734, -0.0896, -0.4183, -0.1490, -0.4172, -0.4197,
         0.0915, -0.1595, -0.2193, -0.2316, -0.1741, -0.3264, -0.4183, -0.1554,
        -0.2272, -0.4121, -0.3622, -0.2985, -0.0040, -0.3869, -0.2141, -0.3499,
        -0.0040, -0.0726, -0.3574, -0.4183, -0.2486, -0.3297, -0.4356, -0.2607,
        -0.2364, -0.1008, -0.3574, -0.2786, -0.3198, -0.3428, -0.4371, -0.0774,
        -0.1124,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4618, -0.3734, -0.0896, -0.4183, -0.1490, -0.4172, -0.4197,
         0.0915, -0.1595, -0.2193, -0.2316, -0.1741, -0.3264, -0.4183, -

Tokens embeddings:
tensor([-0.1035, -0.3901, -0.3901, -0.3020, -0.2139, -0.4183, -0.2253, -0.3725,
        -0.3574, -0.4515, -0.4012, -0.3864,  0.1559, -0.3574, -0.4757, -0.4405,
         0.1856, -0.3834, -0.3834, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3901, -0.3901, -0.3020, -0.2139, -0.4183, -0.2253, -0.3725,
        -0.3574, -0.4515, -0.4012, -0.3864,  0.1559, -0.3574, -0.4757, -

Tokens embeddings:
tensor([-0.1035, -0.3901, -0.3901, -0.3020, -0.2139, -0.4183, -0.2253, -0.3725,
        -0.3574, -0.4515, -0.4012, -0.3864,  0.1559, -0.3574, -0.4757, -0.4405,
         0.1856, -0.3834, -0.3834, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3901, -0.3901, -0.3020, -0.2139, -0.4183, -0.2253, -0.3725,
        -0.3574, -0.4515, -0.4012, -0.3864,  0.1559, -0.3574, -0.4757, -

Tokens embeddings:
tensor([-0.1035, -0.3901, -0.3901, -0.3020, -0.2139, -0.4183, -0.2253, -0.3725,
        -0.3574, -0.4515, -0.4012, -0.3864,  0.1559, -0.3574, -0.4757, -0.4405,
         0.1856, -0.3834, -0.3834, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3901, -0.3901, -0.3020, -0.2139, -0.4183, -0.2253, -0.3725,
        -0.3574, -0.4515, -0.4012, -0.3864,  0.1559, -0.3574, -0.4757, -

Tokens embeddings:
tensor([-0.1035, -0.3901, -0.3901, -0.3020, -0.2139, -0.4183, -0.2253, -0.3725,
        -0.3574, -0.4515, -0.4012, -0.3864,  0.1559, -0.3574, -0.4757, -0.4405,
         0.1856, -0.3834, -0.3834, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3901, -0.3901, -0.3020, -0.2139, -0.4183, -0.2253, -0.3725,
        -0.3574, -0.4515, -0.4012, -0.3864,  0.1559, -0.3574, -0.4757, -

Tokens embeddings:
tensor([-0.1035, -0.3901, -0.3901, -0.3020, -0.2139, -0.4183, -0.2253, -0.3725,
        -0.3574, -0.4515, -0.4012, -0.3864,  0.1559, -0.3574, -0.4757, -0.4405,
         0.1856, -0.3834, -0.3834, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3901, -0.3901, -0.3020, -0.2139, -0.4183, -0.2253, -0.3725,
        -0.3574, -0.4515, -0.4012, -0.3864,  0.1559, -0.3574, -0.4757, -

Tokens embeddings:
tensor([-0.1035, -0.3901, -0.3901, -0.3020, -0.2139, -0.4183, -0.2253, -0.3725,
        -0.3574, -0.4515, -0.4012, -0.3864,  0.1559, -0.3574, -0.4757, -0.4405,
         0.1856, -0.3834, -0.3834, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3901, -0.3901, -0.3020, -0.2139, -0.4183, -0.2253, -0.3725,
        -0.3574, -0.4515, -0.4012, -0.3864,  0.1559, -0.3574, -0.4757, -

Tokens embeddings:
tensor([-0.1035, -0.3901, -0.3901, -0.3020, -0.2139, -0.4183, -0.2253, -0.3725,
        -0.3574, -0.4515, -0.4012, -0.3864,  0.1559, -0.3574, -0.4757, -0.4405,
         0.1856, -0.3834, -0.3834, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3901, -0.3901, -0.3020, -0.2139, -0.4183, -0.2253, -0.3725,
        -0.3574, -0.4515, -0.4012, -0.3864,  0.1559, -0.3574, -0.4757, -

Tokens embeddings:
tensor([-0.1035, -0.3901, -0.3901, -0.3020, -0.2139, -0.4183, -0.2253, -0.3725,
        -0.3574, -0.4515, -0.4012, -0.3864,  0.1559, -0.3574, -0.4757, -0.4405,
         0.1856, -0.3834, -0.3834, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3901, -0.3901, -0.3020, -0.2139, -0.4183, -0.2253, -0.3725,
        -0.3574, -0.4515, -0.4012, -0.3864,  0.1559, -0.3574, -0.4757, -

Tokens embeddings:
tensor([-0.1035, -0.3901, -0.3901, -0.3020, -0.2139, -0.4183, -0.2253, -0.3725,
        -0.3574, -0.4515, -0.4012, -0.3864,  0.1559, -0.3574, -0.4757, -0.4405,
         0.1856, -0.3834, -0.3834, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3901, -0.3901, -0.3020, -0.2139, -0.4183, -0.2253, -0.3725,
        -0.3574, -0.4515, -0.4012, -0.3864,  0.1559, -0.3574, -0.4757, -

Tokens embeddings:
tensor([-0.1035, -0.3901, -0.3901, -0.3020, -0.2139, -0.4183, -0.2253, -0.3725,
        -0.3574, -0.4515, -0.4012, -0.3864,  0.1559, -0.3574, -0.4757, -0.4405,
         0.1856, -0.3834, -0.3834, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3901, -0.3901, -0.3020, -0.2139, -0.4183, -0.2253, -0.3725,
        -0.3574, -0.4515, -0.4012, -0.3864,  0.1559, -0.3574, -0.4757, -

Tokens embeddings:
tensor([-0.1035, -0.3901, -0.3901, -0.3020, -0.2139, -0.4183, -0.2253, -0.3725,
        -0.3574, -0.4515, -0.4012, -0.3864,  0.1559, -0.3574, -0.4757, -0.4405,
         0.1856, -0.3834, -0.3834, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3901, -0.3901, -0.3020, -0.2139, -0.4183, -0.2253, -0.3725,
        -0.3574, -0.4515, -0.4012, -0.3864,  0.1559, -0.3574, -0.4757, -

Tokens embeddings:
tensor([-0.1035, -0.3901, -0.3901, -0.3020, -0.2139, -0.4183, -0.2253, -0.3725,
        -0.3574, -0.4515, -0.4012, -0.3864,  0.1559, -0.3574, -0.4757, -0.4405,
         0.1856, -0.3834, -0.3834, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3901, -0.3901, -0.3020, -0.2139, -0.4183, -0.2253, -0.3725,
        -0.3574, -0.4515, -0.4012, -0.3864,  0.1559, -0.3574, -0.4757, -

Tokens embeddings:
tensor([-0.1035, -0.2624, -0.2908, -0.0040, -0.3857, -0.1916,  0.1152, -0.3750,
        -0.4754, -0.3753, -0.4796, -0.4220, -0.2342, -0.2364, -0.2170,  0.1295,
        -0.2342, -0.0040, -0.2892, -0.3224, -0.4196, -0.3335, -0.2486, -0.3137,
        -0.0040, -0.1741, -0.1595, -0.2020, -0.2942, -0.3081, -0.3335, -0.4183,
        -0.1698,  0.1856, -0.3100,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.2624, -0.2908, -0.0040, -0.3857, -0.1916,  0.1152, -0.3750,
        -0.4754, -0.3753, -0.4796, -0.4220, -0.2342, -0.2364, -0.2170,  

Tokens embeddings:
tensor([-0.1035, -0.2624, -0.2908, -0.0040, -0.3857, -0.1916,  0.1152, -0.3750,
        -0.4754, -0.3753, -0.4796, -0.4220, -0.2342, -0.2364, -0.2170,  0.1295,
        -0.2342, -0.0040, -0.2892, -0.3224, -0.4196, -0.3335, -0.2486, -0.3137,
        -0.0040, -0.1741, -0.1595, -0.2020, -0.2942, -0.3081, -0.3335, -0.4183,
        -0.1698,  0.1856, -0.3100,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.2624, -0.2908, -0.0040, -0.3857, -0.1916,  0.1152, -0.3750,
        -0.4754, -0.3753, -0.4796, -0.4220, -0.2342, -0.2364, -0.2170,  

Tokens embeddings:
tensor([-0.1035, -0.2624, -0.2908, -0.0040, -0.3857, -0.1916,  0.1152, -0.3750,
        -0.4754, -0.3753, -0.4796, -0.4220, -0.2342, -0.2364, -0.2170,  0.1295,
        -0.2342, -0.0040, -0.2892, -0.3224, -0.4196, -0.3335, -0.2486, -0.3137,
        -0.0040, -0.1741, -0.1595, -0.2020, -0.2942, -0.3081, -0.3335, -0.4183,
        -0.1698,  0.1856, -0.3100,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.2624, -0.2908, -0.0040, -0.3857, -0.1916,  0.1152, -0.3750,
        -0.4754, -0.3753, -0.4796, -0.4220, -0.2342, -0.2364, -0.2170,  

Tokens embeddings:
tensor([-0.1035, -0.2624, -0.2908, -0.0040, -0.3857, -0.1916,  0.1152, -0.3750,
        -0.4754, -0.3753, -0.4796, -0.4220, -0.2342, -0.2364, -0.2170,  0.1295,
        -0.2342, -0.0040, -0.2892, -0.3224, -0.4196, -0.3335, -0.2486, -0.3137,
        -0.0040, -0.1741, -0.1595, -0.2020, -0.2942, -0.3081, -0.3335, -0.4183,
        -0.1698,  0.1856, -0.3100,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.2624, -0.2908, -0.0040, -0.3857, -0.1916,  0.1152, -0.3750,
        -0.4754, -0.3753, -0.4796, -0.4220, -0.2342, -0.2364, -0.2170,  

Tokens embeddings:
tensor([-0.1035, -0.2624, -0.2908, -0.0040, -0.3857, -0.1916,  0.1152, -0.3750,
        -0.4754, -0.3753, -0.4796, -0.4220, -0.2342, -0.2364, -0.2170,  0.1295,
        -0.2342, -0.0040, -0.2892, -0.3224, -0.4196, -0.3335, -0.2486, -0.3137,
        -0.0040, -0.1741, -0.1595, -0.2020, -0.2942, -0.3081, -0.3335, -0.4183,
        -0.1698,  0.1856, -0.3100,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.2624, -0.2908, -0.0040, -0.3857, -0.1916,  0.1152, -0.3750,
        -0.4754, -0.3753, -0.4796, -0.4220, -0.2342, -0.2364, -0.2170,  

Tokens embeddings:
tensor([-0.1035, -0.2624, -0.2908, -0.0040, -0.3857, -0.1916,  0.1152, -0.3750,
        -0.4754, -0.3753, -0.4796, -0.4220, -0.2342, -0.2364, -0.2170,  0.1295,
        -0.2342, -0.0040, -0.2892, -0.3224, -0.4196, -0.3335, -0.2486, -0.3137,
        -0.0040, -0.1741, -0.1595, -0.2020, -0.2942, -0.3081, -0.3335, -0.4183,
        -0.1698,  0.1856, -0.3100,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.2624, -0.2908, -0.0040, -0.3857, -0.1916,  0.1152, -0.3750,
        -0.4754, -0.3753, -0.4796, -0.4220, -0.2342, -0.2364, -0.2170,  

Tokens embeddings:
tensor([-0.1035, -0.2624, -0.2908, -0.0040, -0.3857, -0.1916,  0.1152, -0.3750,
        -0.4754, -0.3753, -0.4796, -0.4220, -0.2342, -0.2364, -0.2170,  0.1295,
        -0.2342, -0.0040, -0.2892, -0.3224, -0.4196, -0.3335, -0.2486, -0.3137,
        -0.0040, -0.1741, -0.1595, -0.2020, -0.2942, -0.3081, -0.3335, -0.4183,
        -0.1698,  0.1856, -0.3100,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.2624, -0.2908, -0.0040, -0.3857, -0.1916,  0.1152, -0.3750,
        -0.4754, -0.3753, -0.4796, -0.4220, -0.2342, -0.2364, -0.2170,  

Tokens embeddings:
tensor([-0.1035, -0.2624, -0.2908, -0.0040, -0.3857, -0.1916,  0.1152, -0.3750,
        -0.4754, -0.3753, -0.4796, -0.4220, -0.2342, -0.2364, -0.2170,  0.1295,
        -0.2342, -0.0040, -0.2892, -0.3224, -0.4196, -0.3335, -0.2486, -0.3137,
        -0.0040, -0.1741, -0.1595, -0.2020, -0.2942, -0.3081, -0.3335, -0.4183,
        -0.1698,  0.1856, -0.3100,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.2624, -0.2908, -0.0040, -0.3857, -0.1916,  0.1152, -0.3750,
        -0.4754, -0.3753, -0.4796, -0.4220, -0.2342, -0.2364, -0.2170,  

Tokens embeddings:
tensor([-0.1035, -0.2624, -0.2908, -0.0040, -0.3857, -0.1916,  0.1152, -0.3750,
        -0.4754, -0.3753, -0.4796, -0.4220, -0.2342, -0.2364, -0.2170,  0.1295,
        -0.2342, -0.0040, -0.2892, -0.3224, -0.4196, -0.3335, -0.2486, -0.3137,
        -0.0040, -0.1741, -0.1595, -0.2020, -0.2942, -0.3081, -0.3335, -0.4183,
        -0.1698,  0.1856, -0.3100,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.2624, -0.2908, -0.0040, -0.3857, -0.1916,  0.1152, -0.3750,
        -0.4754, -0.3753, -0.4796, -0.4220, -0.2342, -0.2364, -0.2170,  

Tokens embeddings:
tensor([-0.1035, -0.2624, -0.2908, -0.0040, -0.3857, -0.1916,  0.1152, -0.3750,
        -0.4754, -0.3753, -0.4796, -0.4220, -0.2342, -0.2364, -0.2170,  0.1295,
        -0.2342, -0.0040, -0.2892, -0.3224, -0.4196, -0.3335, -0.2486, -0.3137,
        -0.0040, -0.1741, -0.1595, -0.2020, -0.2942, -0.3081, -0.3335, -0.4183,
        -0.1698,  0.1856, -0.3100,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.2624, -0.2908, -0.0040, -0.3857, -0.1916,  0.1152, -0.3750,
        -0.4754, -0.3753, -0.4796, -0.4220, -0.2342, -0.2364, -0.2170,  

Tokens embeddings:
tensor([-0.1035, -0.2624, -0.2908, -0.0040, -0.3857, -0.1916,  0.1152, -0.3750,
        -0.4754, -0.3753, -0.4796, -0.4220, -0.2342, -0.2364, -0.2170,  0.1295,
        -0.2342, -0.0040, -0.2892, -0.3224, -0.4196, -0.3335, -0.2486, -0.3137,
        -0.0040, -0.1741, -0.1595, -0.2020, -0.2942, -0.3081, -0.3335, -0.4183,
        -0.1698,  0.1856, -0.3100,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.2624, -0.2908, -0.0040, -0.3857, -0.1916,  0.1152, -0.3750,
        -0.4754, -0.3753, -0.4796, -0.4220, -0.2342, -0.2364, -0.2170,  

Tokens embeddings:
tensor([-0.1035, -0.2624, -0.2908, -0.0040, -0.3857, -0.1916,  0.1152, -0.3750,
        -0.4754, -0.3753, -0.4796, -0.4220, -0.2342, -0.2364, -0.2170,  0.1295,
        -0.2342, -0.0040, -0.2892, -0.3224, -0.4196, -0.3335, -0.2486, -0.3137,
        -0.0040, -0.1741, -0.1595, -0.2020, -0.2942, -0.3081, -0.3335, -0.4183,
        -0.1698,  0.1856, -0.3100,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.2624, -0.2908, -0.0040, -0.3857, -0.1916,  0.1152, -0.3750,
        -0.4754, -0.3753, -0.4796, -0.4220, -0.2342, -0.2364, -0.2170,  

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.5065, -0.0592, -0.2661, -0.0040, -0.4938, -0.1919,
        -0.1725, -0.3574, -0.4183, -0.0610, -0.2303, -0.3759, -0.2724, -0.3834,
        -0.3100,  0.0622, -0.3588, -0.4129, -0.2831, -0.3070, -0.3924, -0.4238,
        -0.1895, -0.0896, -0.4240, -0.3322, -0.4183, -0.3256, -0.3574, -0.3759,
        -0.2724, -0.3765, -0.5095, -0.3977, -0.3675, -0.3302, -0.3642, -0.2634,
        -0.3924, -0.3009,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.5065, -0.0592, -0.2661, -0.0040, -0.4938, -0.1919,
        -0.1725, -0.3574, -0.4183, -0.0610, -0.2303, -0.3759, -0.2724, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.5065, -0.0592, -0.2661, -0.0040, -0.4938, -0.1919,
        -0.1725, -0.3574, -0.4183, -0.0610, -0.2303, -0.3759, -0.2724, -0.3834,
        -0.3100,  0.0622, -0.3588, -0.4129, -0.2831, -0.3070, -0.3924, -0.4238,
        -0.1895, -0.0896, -0.4240, -0.3322, -0.4183, -0.3256, -0.3574, -0.3759,
        -0.2724, -0.3765, -0.5095, -0.3977, -0.3675, -0.3302, -0.3642, -0.2634,
        -0.3924, -0.3009,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.5065, -0.0592, -0.2661, -0.0040, -0.4938, -0.1919,
        -0.1725, -0.3574, -0.4183, -0.0610, -0.2303, -0.3759, -0.2724, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.5065, -0.0592, -0.2661, -0.0040, -0.4938, -0.1919,
        -0.1725, -0.3574, -0.4183, -0.0610, -0.2303, -0.3759, -0.2724, -0.3834,
        -0.3100,  0.0622, -0.3588, -0.4129, -0.2831, -0.3070, -0.3924, -0.4238,
        -0.1895, -0.0896, -0.4240, -0.3322, -0.4183, -0.3256, -0.3574, -0.3759,
        -0.2724, -0.3765, -0.5095, -0.3977, -0.3675, -0.3302, -0.3642, -0.2634,
        -0.3924, -0.3009,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.5065, -0.0592, -0.2661, -0.0040, -0.4938, -0.1919,
        -0.1725, -0.3574, -0.4183, -0.0610, -0.2303, -0.3759, -0.2724, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.5065, -0.0592, -0.2661, -0.0040, -0.4938, -0.1919,
        -0.1725, -0.3574, -0.4183, -0.0610, -0.2303, -0.3759, -0.2724, -0.3834,
        -0.3100,  0.0622, -0.3588, -0.4129, -0.2831, -0.3070, -0.3924, -0.4238,
        -0.1895, -0.0896, -0.4240, -0.3322, -0.4183, -0.3256, -0.3574, -0.3759,
        -0.2724, -0.3765, -0.5095, -0.3977, -0.3675, -0.3302, -0.3642, -0.2634,
        -0.3924, -0.3009,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.5065, -0.0592, -0.2661, -0.0040, -0.4938, -0.1919,
        -0.1725, -0.3574, -0.4183, -0.0610, -0.2303, -0.3759, -0.2724, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.5065, -0.0592, -0.2661, -0.0040, -0.4938, -0.1919,
        -0.1725, -0.3574, -0.4183, -0.0610, -0.2303, -0.3759, -0.2724, -0.3834,
        -0.3100,  0.0622, -0.3588, -0.4129, -0.2831, -0.3070, -0.3924, -0.4238,
        -0.1895, -0.0896, -0.4240, -0.3322, -0.4183, -0.3256, -0.3574, -0.3759,
        -0.2724, -0.3765, -0.5095, -0.3977, -0.3675, -0.3302, -0.3642, -0.2634,
        -0.3924, -0.3009,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.5065, -0.0592, -0.2661, -0.0040, -0.4938, -0.1919,
        -0.1725, -0.3574, -0.4183, -0.0610, -0.2303, -0.3759, -0.2724, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.5065, -0.0592, -0.2661, -0.0040, -0.4938, -0.1919,
        -0.1725, -0.3574, -0.4183, -0.0610, -0.2303, -0.3759, -0.2724, -0.3834,
        -0.3100,  0.0622, -0.3588, -0.4129, -0.2831, -0.3070, -0.3924, -0.4238,
        -0.1895, -0.0896, -0.4240, -0.3322, -0.4183, -0.3256, -0.3574, -0.3759,
        -0.2724, -0.3765, -0.5095, -0.3977, -0.3675, -0.3302, -0.3642, -0.2634,
        -0.3924, -0.3009,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.5065, -0.0592, -0.2661, -0.0040, -0.4938, -0.1919,
        -0.1725, -0.3574, -0.4183, -0.0610, -0.2303, -0.3759, -0.2724, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.5065, -0.0592, -0.2661, -0.0040, -0.4938, -0.1919,
        -0.1725, -0.3574, -0.4183, -0.0610, -0.2303, -0.3759, -0.2724, -0.3834,
        -0.3100,  0.0622, -0.3588, -0.4129, -0.2831, -0.3070, -0.3924, -0.4238,
        -0.1895, -0.0896, -0.4240, -0.3322, -0.4183, -0.3256, -0.3574, -0.3759,
        -0.2724, -0.3765, -0.5095, -0.3977, -0.3675, -0.3302, -0.3642, -0.2634,
        -0.3924, -0.3009,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.5065, -0.0592, -0.2661, -0.0040, -0.4938, -0.1919,
        -0.1725, -0.3574, -0.4183, -0.0610, -0.2303, -0.3759, -0.2724, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.5065, -0.0592, -0.2661, -0.0040, -0.4938, -0.1919,
        -0.1725, -0.3574, -0.4183, -0.0610, -0.2303, -0.3759, -0.2724, -0.3834,
        -0.3100,  0.0622, -0.3588, -0.4129, -0.2831, -0.3070, -0.3924, -0.4238,
        -0.1895, -0.0896, -0.4240, -0.3322, -0.4183, -0.3256, -0.3574, -0.3759,
        -0.2724, -0.3765, -0.5095, -0.3977, -0.3675, -0.3302, -0.3642, -0.2634,
        -0.3924, -0.3009,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.5065, -0.0592, -0.2661, -0.0040, -0.4938, -0.1919,
        -0.1725, -0.3574, -0.4183, -0.0610, -0.2303, -0.3759, -0.2724, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.5065, -0.0592, -0.2661, -0.0040, -0.4938, -0.1919,
        -0.1725, -0.3574, -0.4183, -0.0610, -0.2303, -0.3759, -0.2724, -0.3834,
        -0.3100,  0.0622, -0.3588, -0.4129, -0.2831, -0.3070, -0.3924, -0.4238,
        -0.1895, -0.0896, -0.4240, -0.3322, -0.4183, -0.3256, -0.3574, -0.3759,
        -0.2724, -0.3765, -0.5095, -0.3977, -0.3675, -0.3302, -0.3642, -0.2634,
        -0.3924, -0.3009,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.5065, -0.0592, -0.2661, -0.0040, -0.4938, -0.1919,
        -0.1725, -0.3574, -0.4183, -0.0610, -0.2303, -0.3759, -0.2724, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.5065, -0.0592, -0.2661, -0.0040, -0.4938, -0.1919,
        -0.1725, -0.3574, -0.4183, -0.0610, -0.2303, -0.3759, -0.2724, -0.3834,
        -0.3100,  0.0622, -0.3588, -0.4129, -0.2831, -0.3070, -0.3924, -0.4238,
        -0.1895, -0.0896, -0.4240, -0.3322, -0.4183, -0.3256, -0.3574, -0.3759,
        -0.2724, -0.3765, -0.5095, -0.3977, -0.3675, -0.3302, -0.3642, -0.2634,
        -0.3924, -0.3009,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.5065, -0.0592, -0.2661, -0.0040, -0.4938, -0.1919,
        -0.1725, -0.3574, -0.4183, -0.0610, -0.2303, -0.3759, -0.2724, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.5065, -0.0592, -0.2661, -0.0040, -0.4938, -0.1919,
        -0.1725, -0.3574, -0.4183, -0.0610, -0.2303, -0.3759, -0.2724, -0.3834,
        -0.3100,  0.0622, -0.3588, -0.4129, -0.2831, -0.3070, -0.3924, -0.4238,
        -0.1895, -0.0896, -0.4240, -0.3322, -0.4183, -0.3256, -0.3574, -0.3759,
        -0.2724, -0.3765, -0.5095, -0.3977, -0.3675, -0.3302, -0.3642, -0.2634,
        -0.3924, -0.3009,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.5065, -0.0592, -0.2661, -0.0040, -0.4938, -0.1919,
        -0.1725, -0.3574, -0.4183, -0.0610, -0.2303, -0.3759, -0.2724, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.5065, -0.0592, -0.2661, -0.0040, -0.4938, -0.1919,
        -0.1725, -0.3574, -0.4183, -0.0610, -0.2303, -0.3759, -0.2724, -0.3834,
        -0.3100,  0.0622, -0.3588, -0.4129, -0.2831, -0.3070, -0.3924, -0.4238,
        -0.1895, -0.0896, -0.4240, -0.3322, -0.4183, -0.3256, -0.3574, -0.3759,
        -0.2724, -0.3765, -0.5095, -0.3977, -0.3675, -0.3302, -0.3642, -0.2634,
        -0.3924, -0.3009,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.5065, -0.0592, -0.2661, -0.0040, -0.4938, -0.1919,
        -0.1725, -0.3574, -0.4183, -0.0610, -0.2303, -0.3759, -0.2724, -

Tokens embeddings:
tensor([-0.1035, -0.4979, -0.1744, -0.2692, -0.3834, -0.3100, -0.4708, -0.0592,
        -0.3844, -0.3219, -0.2342, -0.0040, -0.1296,  0.1482, -0.1623,  0.1856,
        -0.3750, -0.3887, -0.4260, -0.1296,  0.0363, -0.1916,  0.0876, -0.1623,
         0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4979, -0.1744, -0.2692, -0.3834, -0.3100, -0.4708, -0.0592,
        -0.3844, -0.3219, -0.2342, -0.0040, -0.1296,  0.1482, -0.1623,  

Tokens embeddings:
tensor([-0.1035, -0.4979, -0.1744, -0.2692, -0.3834, -0.3100, -0.4708, -0.0592,
        -0.3844, -0.3219, -0.2342, -0.0040, -0.1296,  0.1482, -0.1623,  0.1856,
        -0.3750, -0.3887, -0.4260, -0.1296,  0.0363, -0.1916,  0.0876, -0.1623,
         0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4979, -0.1744, -0.2692, -0.3834, -0.3100, -0.4708, -0.0592,
        -0.3844, -0.3219, -0.2342, -0.0040, -0.1296,  0.1482, -0.1623,  

Tokens embeddings:
tensor([-0.1035, -0.4979, -0.1744, -0.2692, -0.3834, -0.3100, -0.4708, -0.0592,
        -0.3844, -0.3219, -0.2342, -0.0040, -0.1296,  0.1482, -0.1623,  0.1856,
        -0.3750, -0.3887, -0.4260, -0.1296,  0.0363, -0.1916,  0.0876, -0.1623,
         0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4979, -0.1744, -0.2692, -0.3834, -0.3100, -0.4708, -0.0592,
        -0.3844, -0.3219, -0.2342, -0.0040, -0.1296,  0.1482, -0.1623,  

Tokens embeddings:
tensor([-0.1035, -0.4979, -0.1744, -0.2692, -0.3834, -0.3100, -0.4708, -0.0592,
        -0.3844, -0.3219, -0.2342, -0.0040, -0.1296,  0.1482, -0.1623,  0.1856,
        -0.3750, -0.3887, -0.4260, -0.1296,  0.0363, -0.1916,  0.0876, -0.1623,
         0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4979, -0.1744, -0.2692, -0.3834, -0.3100, -0.4708, -0.0592,
        -0.3844, -0.3219, -0.2342, -0.0040, -0.1296,  0.1482, -0.1623,  

Tokens embeddings:
tensor([-0.1035, -0.4979, -0.1744, -0.2692, -0.3834, -0.3100, -0.4708, -0.0592,
        -0.3844, -0.3219, -0.2342, -0.0040, -0.1296,  0.1482, -0.1623,  0.1856,
        -0.3750, -0.3887, -0.4260, -0.1296,  0.0363, -0.1916,  0.0876, -0.1623,
         0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4979, -0.1744, -0.2692, -0.3834, -0.3100, -0.4708, -0.0592,
        -0.3844, -0.3219, -0.2342, -0.0040, -0.1296,  0.1482, -0.1623,  

Tokens embeddings:
tensor([-0.1035, -0.4979, -0.1744, -0.2692, -0.3834, -0.3100, -0.4708, -0.0592,
        -0.3844, -0.3219, -0.2342, -0.0040, -0.1296,  0.1482, -0.1623,  0.1856,
        -0.3750, -0.3887, -0.4260, -0.1296,  0.0363, -0.1916,  0.0876, -0.1623,
         0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4979, -0.1744, -0.2692, -0.3834, -0.3100, -0.4708, -0.0592,
        -0.3844, -0.3219, -0.2342, -0.0040, -0.1296,  0.1482, -0.1623,  

Tokens embeddings:
tensor([-0.1035, -0.4979, -0.1744, -0.2692, -0.3834, -0.3100, -0.4708, -0.0592,
        -0.3844, -0.3219, -0.2342, -0.0040, -0.1296,  0.1482, -0.1623,  0.1856,
        -0.3750, -0.3887, -0.4260, -0.1296,  0.0363, -0.1916,  0.0876, -0.1623,
         0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4979, -0.1744, -0.2692, -0.3834, -0.3100, -0.4708, -0.0592,
        -0.3844, -0.3219, -0.2342, -0.0040, -0.1296,  0.1482, -0.1623,  

Tokens embeddings:
tensor([-0.1035, -0.4979, -0.1744, -0.2692, -0.3834, -0.3100, -0.4708, -0.0592,
        -0.3844, -0.3219, -0.2342, -0.0040, -0.1296,  0.1482, -0.1623,  0.1856,
        -0.3750, -0.3887, -0.4260, -0.1296,  0.0363, -0.1916,  0.0876, -0.1623,
         0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4979, -0.1744, -0.2692, -0.3834, -0.3100, -0.4708, -0.0592,
        -0.3844, -0.3219, -0.2342, -0.0040, -0.1296,  0.1482, -0.1623,  

Tokens embeddings:
tensor([-0.1035, -0.4979, -0.1744, -0.2692, -0.3834, -0.3100, -0.4708, -0.0592,
        -0.3844, -0.3219, -0.2342, -0.0040, -0.1296,  0.1482, -0.1623,  0.1856,
        -0.3750, -0.3887, -0.4260, -0.1296,  0.0363, -0.1916,  0.0876, -0.1623,
         0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4979, -0.1744, -0.2692, -0.3834, -0.3100, -0.4708, -0.0592,
        -0.3844, -0.3219, -0.2342, -0.0040, -0.1296,  0.1482, -0.1623,  

Tokens embeddings:
tensor([-0.1035, -0.4979, -0.1744, -0.2692, -0.3834, -0.3100, -0.4708, -0.0592,
        -0.3844, -0.3219, -0.2342, -0.0040, -0.1296,  0.1482, -0.1623,  0.1856,
        -0.3750, -0.3887, -0.4260, -0.1296,  0.0363, -0.1916,  0.0876, -0.1623,
         0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4979, -0.1744, -0.2692, -0.3834, -0.3100, -0.4708, -0.0592,
        -0.3844, -0.3219, -0.2342, -0.0040, -0.1296,  0.1482, -0.1623,  

Tokens embeddings:
tensor([-0.1035, -0.4979, -0.1744, -0.2692, -0.3834, -0.3100, -0.4708, -0.0592,
        -0.3844, -0.3219, -0.2342, -0.0040, -0.1296,  0.1482, -0.1623,  0.1856,
        -0.3750, -0.3887, -0.4260, -0.1296,  0.0363, -0.1916,  0.0876, -0.1623,
         0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4979, -0.1744, -0.2692, -0.3834, -0.3100, -0.4708, -0.0592,
        -0.3844, -0.3219, -0.2342, -0.0040, -0.1296,  0.1482, -0.1623,  

Tokens embeddings:
tensor([-0.1035, -0.4979, -0.1744, -0.2692, -0.3834, -0.3100, -0.4708, -0.0592,
        -0.3844, -0.3219, -0.2342, -0.0040, -0.1296,  0.1482, -0.1623,  0.1856,
        -0.3750, -0.3887, -0.4260, -0.1296,  0.0363, -0.1916,  0.0876, -0.1623,
         0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4979, -0.1744, -0.2692, -0.3834, -0.3100, -0.4708, -0.0592,
        -0.3844, -0.3219, -0.2342, -0.0040, -0.1296,  0.1482, -0.1623,  

Tokens embeddings:
tensor([-0.1035, -0.4207,  0.1856, -0.3811, -0.2217, -0.4264, -0.3298, -0.2984,
        -0.5051, -0.3303, -0.4087, -0.0560,  0.0014, -0.3257, -0.4940, -0.3640,
        -0.3451, -0.4488, -0.3298, -0.4376, -0.4410, -0.3622, -0.3469, -0.3723,
        -0.2364, -0.4376, -0.5051, -0.2303, -0.2795, -0.0896, -0.4260, -0.3773,
         0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4207,  0.1856, -0.3811, -0.2217, -0.4264, -0.3298, -0.2984,
        -0.5051, -0.3303, -0.4087, -0.0560,  0.0014, -0.3257, -0.4940, -

Tokens embeddings:
tensor([-0.1035, -0.4207,  0.1856, -0.3811, -0.2217, -0.4264, -0.3298, -0.2984,
        -0.5051, -0.3303, -0.4087, -0.0560,  0.0014, -0.3257, -0.4940, -0.3640,
        -0.3451, -0.4488, -0.3298, -0.4376, -0.4410, -0.3622, -0.3469, -0.3723,
        -0.2364, -0.4376, -0.5051, -0.2303, -0.2795, -0.0896, -0.4260, -0.3773,
         0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4207,  0.1856, -0.3811, -0.2217, -0.4264, -0.3298, -0.2984,
        -0.5051, -0.3303, -0.4087, -0.0560,  0.0014, -0.3257, -0.4940, -

Tokens embeddings:
tensor([-0.1035, -0.4207,  0.1856, -0.3811, -0.2217, -0.4264, -0.3298, -0.2984,
        -0.5051, -0.3303, -0.4087, -0.0560,  0.0014, -0.3257, -0.4940, -0.3640,
        -0.3451, -0.4488, -0.3298, -0.4376, -0.4410, -0.3622, -0.3469, -0.3723,
        -0.2364, -0.4376, -0.5051, -0.2303, -0.2795, -0.0896, -0.4260, -0.3773,
         0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4207,  0.1856, -0.3811, -0.2217, -0.4264, -0.3298, -0.2984,
        -0.5051, -0.3303, -0.4087, -0.0560,  0.0014, -0.3257, -0.4940, -

Tokens embeddings:
tensor([-0.1035, -0.4207,  0.1856, -0.3811, -0.2217, -0.4264, -0.3298, -0.2984,
        -0.5051, -0.3303, -0.4087, -0.0560,  0.0014, -0.3257, -0.4940, -0.3640,
        -0.3451, -0.4488, -0.3298, -0.4376, -0.4410, -0.3622, -0.3469, -0.3723,
        -0.2364, -0.4376, -0.5051, -0.2303, -0.2795, -0.0896, -0.4260, -0.3773,
         0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4207,  0.1856, -0.3811, -0.2217, -0.4264, -0.3298, -0.2984,
        -0.5051, -0.3303, -0.4087, -0.0560,  0.0014, -0.3257, -0.4940, -

Tokens embeddings:
tensor([-0.1035, -0.4207,  0.1856, -0.3811, -0.2217, -0.4264, -0.3298, -0.2984,
        -0.5051, -0.3303, -0.4087, -0.0560,  0.0014, -0.3257, -0.4940, -0.3640,
        -0.3451, -0.4488, -0.3298, -0.4376, -0.4410, -0.3622, -0.3469, -0.3723,
        -0.2364, -0.4376, -0.5051, -0.2303, -0.2795, -0.0896, -0.4260, -0.3773,
         0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4207,  0.1856, -0.3811, -0.2217, -0.4264, -0.3298, -0.2984,
        -0.5051, -0.3303, -0.4087, -0.0560,  0.0014, -0.3257, -0.4940, -

Tokens embeddings:
tensor([-0.1035, -0.4207,  0.1856, -0.3811, -0.2217, -0.4264, -0.3298, -0.2984,
        -0.5051, -0.3303, -0.4087, -0.0560,  0.0014, -0.3257, -0.4940, -0.3640,
        -0.3451, -0.4488, -0.3298, -0.4376, -0.4410, -0.3622, -0.3469, -0.3723,
        -0.2364, -0.4376, -0.5051, -0.2303, -0.2795, -0.0896, -0.4260, -0.3773,
         0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4207,  0.1856, -0.3811, -0.2217, -0.4264, -0.3298, -0.2984,
        -0.5051, -0.3303, -0.4087, -0.0560,  0.0014, -0.3257, -0.4940, -

Tokens embeddings:
tensor([-0.1035, -0.4207,  0.1856, -0.3811, -0.2217, -0.4264, -0.3298, -0.2984,
        -0.5051, -0.3303, -0.4087, -0.0560,  0.0014, -0.3257, -0.4940, -0.3640,
        -0.3451, -0.4488, -0.3298, -0.4376, -0.4410, -0.3622, -0.3469, -0.3723,
        -0.2364, -0.4376, -0.5051, -0.2303, -0.2795, -0.0896, -0.4260, -0.3773,
         0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4207,  0.1856, -0.3811, -0.2217, -0.4264, -0.3298, -0.2984,
        -0.5051, -0.3303, -0.4087, -0.0560,  0.0014, -0.3257, -0.4940, -

Tokens embeddings:
tensor([-0.1035, -0.4207,  0.1856, -0.3811, -0.2217, -0.4264, -0.3298, -0.2984,
        -0.5051, -0.3303, -0.4087, -0.0560,  0.0014, -0.3257, -0.4940, -0.3640,
        -0.3451, -0.4488, -0.3298, -0.4376, -0.4410, -0.3622, -0.3469, -0.3723,
        -0.2364, -0.4376, -0.5051, -0.2303, -0.2795, -0.0896, -0.4260, -0.3773,
         0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4207,  0.1856, -0.3811, -0.2217, -0.4264, -0.3298, -0.2984,
        -0.5051, -0.3303, -0.4087, -0.0560,  0.0014, -0.3257, -0.4940, -

Tokens embeddings:
tensor([-0.1035, -0.4207,  0.1856, -0.3811, -0.2217, -0.4264, -0.3298, -0.2984,
        -0.5051, -0.3303, -0.4087, -0.0560,  0.0014, -0.3257, -0.4940, -0.3640,
        -0.3451, -0.4488, -0.3298, -0.4376, -0.4410, -0.3622, -0.3469, -0.3723,
        -0.2364, -0.4376, -0.5051, -0.2303, -0.2795, -0.0896, -0.4260, -0.3773,
         0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4207,  0.1856, -0.3811, -0.2217, -0.4264, -0.3298, -0.2984,
        -0.5051, -0.3303, -0.4087, -0.0560,  0.0014, -0.3257, -0.4940, -

Tokens embeddings:
tensor([-0.1035, -0.4207,  0.1856, -0.3811, -0.2217, -0.4264, -0.3298, -0.2984,
        -0.5051, -0.3303, -0.4087, -0.0560,  0.0014, -0.3257, -0.4940, -0.3640,
        -0.3451, -0.4488, -0.3298, -0.4376, -0.4410, -0.3622, -0.3469, -0.3723,
        -0.2364, -0.4376, -0.5051, -0.2303, -0.2795, -0.0896, -0.4260, -0.3773,
         0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4207,  0.1856, -0.3811, -0.2217, -0.4264, -0.3298, -0.2984,
        -0.5051, -0.3303, -0.4087, -0.0560,  0.0014, -0.3257, -0.4940, -

Tokens embeddings:
tensor([-0.1035, -0.4207,  0.1856, -0.3811, -0.2217, -0.4264, -0.3298, -0.2984,
        -0.5051, -0.3303, -0.4087, -0.0560,  0.0014, -0.3257, -0.4940, -0.3640,
        -0.3451, -0.4488, -0.3298, -0.4376, -0.4410, -0.3622, -0.3469, -0.3723,
        -0.2364, -0.4376, -0.5051, -0.2303, -0.2795, -0.0896, -0.4260, -0.3773,
         0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4207,  0.1856, -0.3811, -0.2217, -0.4264, -0.3298, -0.2984,
        -0.5051, -0.3303, -0.4087, -0.0560,  0.0014, -0.3257, -0.4940, -

Tokens embeddings:
tensor([-0.1035, -0.4207,  0.1856, -0.3811, -0.2217, -0.4264, -0.3298, -0.2984,
        -0.5051, -0.3303, -0.4087, -0.0560,  0.0014, -0.3257, -0.4940, -0.3640,
        -0.3451, -0.4488, -0.3298, -0.4376, -0.4410, -0.3622, -0.3469, -0.3723,
        -0.2364, -0.4376, -0.5051, -0.2303, -0.2795, -0.0896, -0.4260, -0.3773,
         0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4207,  0.1856, -0.3811, -0.2217, -0.4264, -0.3298, -0.2984,
        -0.5051, -0.3303, -0.4087, -0.0560,  0.0014, -0.3257, -0.4940, -

Tokens embeddings:
tensor([-0.1035, -0.3020, -0.3869, -0.0896, -0.4183, -0.3164, -0.3811, -0.4441,
        -0.4183, -0.3557, -0.3233, -0.4011, -0.4183, -0.3241, -0.3574, -0.1102,
         0.0171, -0.4317, -0.0896, -0.3901, -0.3901, -0.4618, -0.4796, -0.3977,
        -0.3640, -0.1308, -0.3834, -0.1289, -0.4272, -0.3335, -0.3773, -0.4703,
        -0.3127,  0.1856, -0.3834, -0.3834, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3020, -0.3869, -0.0896, -0.4183, -0.3164, -0.3811, -0.4441,
        -0.4183, -0.3557, -0.3233, -0.4011, -0.4183, -0.3241, -0.3574, -

Tokens embeddings:
tensor([-0.1035, -0.3020, -0.3869, -0.0896, -0.4183, -0.3164, -0.3811, -0.4441,
        -0.4183, -0.3557, -0.3233, -0.4011, -0.4183, -0.3241, -0.3574, -0.1102,
         0.0171, -0.4317, -0.0896, -0.3901, -0.3901, -0.4618, -0.4796, -0.3977,
        -0.3640, -0.1308, -0.3834, -0.1289, -0.4272, -0.3335, -0.3773, -0.4703,
        -0.3127,  0.1856, -0.3834, -0.3834, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3020, -0.3869, -0.0896, -0.4183, -0.3164, -0.3811, -0.4441,
        -0.4183, -0.3557, -0.3233, -0.4011, -0.4183, -0.3241, -0.3574, -

Tokens embeddings:
tensor([-0.1035, -0.3020, -0.3869, -0.0896, -0.4183, -0.3164, -0.3811, -0.4441,
        -0.4183, -0.3557, -0.3233, -0.4011, -0.4183, -0.3241, -0.3574, -0.1102,
         0.0171, -0.4317, -0.0896, -0.3901, -0.3901, -0.4618, -0.4796, -0.3977,
        -0.3640, -0.1308, -0.3834, -0.1289, -0.4272, -0.3335, -0.3773, -0.4703,
        -0.3127,  0.1856, -0.3834, -0.3834, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3020, -0.3869, -0.0896, -0.4183, -0.3164, -0.3811, -0.4441,
        -0.4183, -0.3557, -0.3233, -0.4011, -0.4183, -0.3241, -0.3574, -

Tokens embeddings:
tensor([-0.1035, -0.3020, -0.3869, -0.0896, -0.4183, -0.3164, -0.3811, -0.4441,
        -0.4183, -0.3557, -0.3233, -0.4011, -0.4183, -0.3241, -0.3574, -0.1102,
         0.0171, -0.4317, -0.0896, -0.3901, -0.3901, -0.4618, -0.4796, -0.3977,
        -0.3640, -0.1308, -0.3834, -0.1289, -0.4272, -0.3335, -0.3773, -0.4703,
        -0.3127,  0.1856, -0.3834, -0.3834, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3020, -0.3869, -0.0896, -0.4183, -0.3164, -0.3811, -0.4441,
        -0.4183, -0.3557, -0.3233, -0.4011, -0.4183, -0.3241, -0.3574, -

Tokens embeddings:
tensor([-0.1035, -0.3020, -0.3869, -0.0896, -0.4183, -0.3164, -0.3811, -0.4441,
        -0.4183, -0.3557, -0.3233, -0.4011, -0.4183, -0.3241, -0.3574, -0.1102,
         0.0171, -0.4317, -0.0896, -0.3901, -0.3901, -0.4618, -0.4796, -0.3977,
        -0.3640, -0.1308, -0.3834, -0.1289, -0.4272, -0.3335, -0.3773, -0.4703,
        -0.3127,  0.1856, -0.3834, -0.3834, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3020, -0.3869, -0.0896, -0.4183, -0.3164, -0.3811, -0.4441,
        -0.4183, -0.3557, -0.3233, -0.4011, -0.4183, -0.3241, -0.3574, -

Tokens embeddings:
tensor([-0.1035, -0.3020, -0.3869, -0.0896, -0.4183, -0.3164, -0.3811, -0.4441,
        -0.4183, -0.3557, -0.3233, -0.4011, -0.4183, -0.3241, -0.3574, -0.1102,
         0.0171, -0.4317, -0.0896, -0.3901, -0.3901, -0.4618, -0.4796, -0.3977,
        -0.3640, -0.1308, -0.3834, -0.1289, -0.4272, -0.3335, -0.3773, -0.4703,
        -0.3127,  0.1856, -0.3834, -0.3834, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3020, -0.3869, -0.0896, -0.4183, -0.3164, -0.3811, -0.4441,
        -0.4183, -0.3557, -0.3233, -0.4011, -0.4183, -0.3241, -0.3574, -

Tokens embeddings:
tensor([-0.1035, -0.3020, -0.3869, -0.0896, -0.4183, -0.3164, -0.3811, -0.4441,
        -0.4183, -0.3557, -0.3233, -0.4011, -0.4183, -0.3241, -0.3574, -0.1102,
         0.0171, -0.4317, -0.0896, -0.3901, -0.3901, -0.4618, -0.4796, -0.3977,
        -0.3640, -0.1308, -0.3834, -0.1289, -0.4272, -0.3335, -0.3773, -0.4703,
        -0.3127,  0.1856, -0.3834, -0.3834, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3020, -0.3869, -0.0896, -0.4183, -0.3164, -0.3811, -0.4441,
        -0.4183, -0.3557, -0.3233, -0.4011, -0.4183, -0.3241, -0.3574, -

Tokens embeddings:
tensor([-0.1035, -0.3020, -0.3869, -0.0896, -0.4183, -0.3164, -0.3811, -0.4441,
        -0.4183, -0.3557, -0.3233, -0.4011, -0.4183, -0.3241, -0.3574, -0.1102,
         0.0171, -0.4317, -0.0896, -0.3901, -0.3901, -0.4618, -0.4796, -0.3977,
        -0.3640, -0.1308, -0.3834, -0.1289, -0.4272, -0.3335, -0.3773, -0.4703,
        -0.3127,  0.1856, -0.3834, -0.3834, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3020, -0.3869, -0.0896, -0.4183, -0.3164, -0.3811, -0.4441,
        -0.4183, -0.3557, -0.3233, -0.4011, -0.4183, -0.3241, -0.3574, -

Tokens embeddings:
tensor([-0.1035, -0.3020, -0.3869, -0.0896, -0.4183, -0.3164, -0.3811, -0.4441,
        -0.4183, -0.3557, -0.3233, -0.4011, -0.4183, -0.3241, -0.3574, -0.1102,
         0.0171, -0.4317, -0.0896, -0.3901, -0.3901, -0.4618, -0.4796, -0.3977,
        -0.3640, -0.1308, -0.3834, -0.1289, -0.4272, -0.3335, -0.3773, -0.4703,
        -0.3127,  0.1856, -0.3834, -0.3834, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3020, -0.3869, -0.0896, -0.4183, -0.3164, -0.3811, -0.4441,
        -0.4183, -0.3557, -0.3233, -0.4011, -0.4183, -0.3241, -0.3574, -

Tokens embeddings:
tensor([-0.1035, -0.3020, -0.3869, -0.0896, -0.4183, -0.3164, -0.3811, -0.4441,
        -0.4183, -0.3557, -0.3233, -0.4011, -0.4183, -0.3241, -0.3574, -0.1102,
         0.0171, -0.4317, -0.0896, -0.3901, -0.3901, -0.4618, -0.4796, -0.3977,
        -0.3640, -0.1308, -0.3834, -0.1289, -0.4272, -0.3335, -0.3773, -0.4703,
        -0.3127,  0.1856, -0.3834, -0.3834, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3020, -0.3869, -0.0896, -0.4183, -0.3164, -0.3811, -0.4441,
        -0.4183, -0.3557, -0.3233, -0.4011, -0.4183, -0.3241, -0.3574, -

Tokens embeddings:
tensor([-0.1035, -0.3020, -0.3869, -0.0896, -0.4183, -0.3164, -0.3811, -0.4441,
        -0.4183, -0.3557, -0.3233, -0.4011, -0.4183, -0.3241, -0.3574, -0.1102,
         0.0171, -0.4317, -0.0896, -0.3901, -0.3901, -0.4618, -0.4796, -0.3977,
        -0.3640, -0.1308, -0.3834, -0.1289, -0.4272, -0.3335, -0.3773, -0.4703,
        -0.3127,  0.1856, -0.3834, -0.3834, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3020, -0.3869, -0.0896, -0.4183, -0.3164, -0.3811, -0.4441,
        -0.4183, -0.3557, -0.3233, -0.4011, -0.4183, -0.3241, -0.3574, -

Tokens embeddings:
tensor([-0.1035, -0.3020, -0.3869, -0.0896, -0.4183, -0.3164, -0.3811, -0.4441,
        -0.4183, -0.3557, -0.3233, -0.4011, -0.4183, -0.3241, -0.3574, -0.1102,
         0.0171, -0.4317, -0.0896, -0.3901, -0.3901, -0.4618, -0.4796, -0.3977,
        -0.3640, -0.1308, -0.3834, -0.1289, -0.4272, -0.3335, -0.3773, -0.4703,
        -0.3127,  0.1856, -0.3834, -0.3834, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3020, -0.3869, -0.0896, -0.4183, -0.3164, -0.3811, -0.4441,
        -0.4183, -0.3557, -0.3233, -0.4011, -0.4183, -0.3241, -0.3574, -

Tokens embeddings:
tensor([-0.1035, -0.4330, -0.4780, -0.2528, -0.3489, -0.3105, -0.0534, -0.2406,
        -0.0592, -0.4364, -0.0040, -0.3611, -0.0448, -0.2714, -0.1785, -0.2670,
        -0.0896, -0.3811, -0.2791, -0.1741, -0.3583, -0.2945, -0.2364, -0.1741,
        -0.2694,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4330, -0.4780, -0.2528, -0.3489, -0.3105, -0.0534, -0.2406,
        -0.0592, -0.4364, -0.0040, -0.3611, -0.0448, -0.2714, -0.1785, -

Tokens embeddings:
tensor([-0.1035, -0.4330, -0.4780, -0.2528, -0.3489, -0.3105, -0.0534, -0.2406,
        -0.0592, -0.4364, -0.0040, -0.3611, -0.0448, -0.2714, -0.1785, -0.2670,
        -0.0896, -0.3811, -0.2791, -0.1741, -0.3583, -0.2945, -0.2364, -0.1741,
        -0.2694,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4330, -0.4780, -0.2528, -0.3489, -0.3105, -0.0534, -0.2406,
        -0.0592, -0.4364, -0.0040, -0.3611, -0.0448, -0.2714, -0.1785, -

Tokens embeddings:
tensor([-0.1035, -0.4330, -0.4780, -0.2528, -0.3489, -0.3105, -0.0534, -0.2406,
        -0.0592, -0.4364, -0.0040, -0.3611, -0.0448, -0.2714, -0.1785, -0.2670,
        -0.0896, -0.3811, -0.2791, -0.1741, -0.3583, -0.2945, -0.2364, -0.1741,
        -0.2694,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4330, -0.4780, -0.2528, -0.3489, -0.3105, -0.0534, -0.2406,
        -0.0592, -0.4364, -0.0040, -0.3611, -0.0448, -0.2714, -0.1785, -

Tokens embeddings:
tensor([-0.1035, -0.4330, -0.4780, -0.2528, -0.3489, -0.3105, -0.0534, -0.2406,
        -0.0592, -0.4364, -0.0040, -0.3611, -0.0448, -0.2714, -0.1785, -0.2670,
        -0.0896, -0.3811, -0.2791, -0.1741, -0.3583, -0.2945, -0.2364, -0.1741,
        -0.2694,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4330, -0.4780, -0.2528, -0.3489, -0.3105, -0.0534, -0.2406,
        -0.0592, -0.4364, -0.0040, -0.3611, -0.0448, -0.2714, -0.1785, -

Tokens embeddings:
tensor([-0.1035, -0.4330, -0.4780, -0.2528, -0.3489, -0.3105, -0.0534, -0.2406,
        -0.0592, -0.4364, -0.0040, -0.3611, -0.0448, -0.2714, -0.1785, -0.2670,
        -0.0896, -0.3811, -0.2791, -0.1741, -0.3583, -0.2945, -0.2364, -0.1741,
        -0.2694,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4330, -0.4780, -0.2528, -0.3489, -0.3105, -0.0534, -0.2406,
        -0.0592, -0.4364, -0.0040, -0.3611, -0.0448, -0.2714, -0.1785, -

Tokens embeddings:
tensor([-0.1035, -0.4330, -0.4780, -0.2528, -0.3489, -0.3105, -0.0534, -0.2406,
        -0.0592, -0.4364, -0.0040, -0.3611, -0.0448, -0.2714, -0.1785, -0.2670,
        -0.0896, -0.3811, -0.2791, -0.1741, -0.3583, -0.2945, -0.2364, -0.1741,
        -0.2694,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4330, -0.4780, -0.2528, -0.3489, -0.3105, -0.0534, -0.2406,
        -0.0592, -0.4364, -0.0040, -0.3611, -0.0448, -0.2714, -0.1785, -

Tokens embeddings:
tensor([-0.1035, -0.4330, -0.4780, -0.2528, -0.3489, -0.3105, -0.0534, -0.2406,
        -0.0592, -0.4364, -0.0040, -0.3611, -0.0448, -0.2714, -0.1785, -0.2670,
        -0.0896, -0.3811, -0.2791, -0.1741, -0.3583, -0.2945, -0.2364, -0.1741,
        -0.2694,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4330, -0.4780, -0.2528, -0.3489, -0.3105, -0.0534, -0.2406,
        -0.0592, -0.4364, -0.0040, -0.3611, -0.0448, -0.2714, -0.1785, -

Tokens embeddings:
tensor([-0.1035, -0.4330, -0.4780, -0.2528, -0.3489, -0.3105, -0.0534, -0.2406,
        -0.0592, -0.4364, -0.0040, -0.3611, -0.0448, -0.2714, -0.1785, -0.2670,
        -0.0896, -0.3811, -0.2791, -0.1741, -0.3583, -0.2945, -0.2364, -0.1741,
        -0.2694,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4330, -0.4780, -0.2528, -0.3489, -0.3105, -0.0534, -0.2406,
        -0.0592, -0.4364, -0.0040, -0.3611, -0.0448, -0.2714, -0.1785, -

Tokens embeddings:
tensor([-0.1035, -0.4330, -0.4780, -0.2528, -0.3489, -0.3105, -0.0534, -0.2406,
        -0.0592, -0.4364, -0.0040, -0.3611, -0.0448, -0.2714, -0.1785, -0.2670,
        -0.0896, -0.3811, -0.2791, -0.1741, -0.3583, -0.2945, -0.2364, -0.1741,
        -0.2694,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4330, -0.4780, -0.2528, -0.3489, -0.3105, -0.0534, -0.2406,
        -0.0592, -0.4364, -0.0040, -0.3611, -0.0448, -0.2714, -0.1785, -

Tokens embeddings:
tensor([-0.1035, -0.4330, -0.4780, -0.2528, -0.3489, -0.3105, -0.0534, -0.2406,
        -0.0592, -0.4364, -0.0040, -0.3611, -0.0448, -0.2714, -0.1785, -0.2670,
        -0.0896, -0.3811, -0.2791, -0.1741, -0.3583, -0.2945, -0.2364, -0.1741,
        -0.2694,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4330, -0.4780, -0.2528, -0.3489, -0.3105, -0.0534, -0.2406,
        -0.0592, -0.4364, -0.0040, -0.3611, -0.0448, -0.2714, -0.1785, -

Tokens embeddings:
tensor([-0.1035, -0.4330, -0.4780, -0.2528, -0.3489, -0.3105, -0.0534, -0.2406,
        -0.0592, -0.4364, -0.0040, -0.3611, -0.0448, -0.2714, -0.1785, -0.2670,
        -0.0896, -0.3811, -0.2791, -0.1741, -0.3583, -0.2945, -0.2364, -0.1741,
        -0.2694,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4330, -0.4780, -0.2528, -0.3489, -0.3105, -0.0534, -0.2406,
        -0.0592, -0.4364, -0.0040, -0.3611, -0.0448, -0.2714, -0.1785, -

Tokens embeddings:
tensor([-0.1035, -0.4330, -0.4780, -0.2528, -0.3489, -0.3105, -0.0534, -0.2406,
        -0.0592, -0.4364, -0.0040, -0.3611, -0.0448, -0.2714, -0.1785, -0.2670,
        -0.0896, -0.3811, -0.2791, -0.1741, -0.3583, -0.2945, -0.2364, -0.1741,
        -0.2694,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4330, -0.4780, -0.2528, -0.3489, -0.3105, -0.0534, -0.2406,
        -0.0592, -0.4364, -0.0040, -0.3611, -0.0448, -0.2714, -0.1785, -

In [31]:
# Max Pooling for all tokens in sentences and argmax

# Select some sentences randomly
sent_index = [0,1,2,3,4,5,6,7,8,9,10]

for s in sent_index:
  print("*" * 100)
  # Get the sentence's words
  tokens = test_inputs[s]
  # For each layer...
  for l in range(12):
    print("\nLayer", l+1)
    attention = np.squeeze(test_attentions[s][l].detach().cpu().numpy(), axis=0)
    # and for each head
    #print("Head", h+1)
    # Get the attention for the cls token
    encoded_tokens = bert_tokenizer(tokens, truncation=True, padding=True, max_length=128, return_tensors='pt')
    encoded_tokens = encoded_tokens.to(device)
    with torch.no_grad():
      model_output1 = model_e(**encoded_tokens)
      tokens_embeddings = max_pooling(model_output1, encoded_tokens['attention_mask'])
      tokens_embeddings = tokens_embeddings.cpu()
      cls_attentions = head[0]
      display(HTML(colorize(tokens, cls_attentions)))
      print("Tokens embeddings:")
      print(tokens_embeddings)
      arg = argmax(tokens_embeddings)
      print('arg max of %s: %d' % (tokens_embeddings, arg))

****************************************************************************************************

Layer 1


Tokens embeddings:
tensor([-1.0346e-01, -4.4878e-01, -9.2825e-02, -1.4480e-01, -1.0000e+09,
        -1.1771e-01, -3.6351e-01, -1.5942e-01, -7.3015e-02, -1.0000e+09,
        -1.6289e-01, -3.3029e-01, -1.2150e-01, -8.9630e-02, -1.0000e+09,
        -1.7178e-01, -2.9962e-01, -1.0016e-01, -9.2825e-02, -1.0000e+09,
        -6.2173e-02, -1.1879e-01, -1.7709e-01, -2.1991e-01, -1.0000e+09,
        -6.2173e-02, -1.7413e-01, -2.1756e-02, -1.1693e-01, -1.0000e+09,
        -4.0138e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.4878e-01, -9.2825e-02, -1.4480e-01, -1.0000e+09,
        -1.1771e-01, -3.6351e-01, -1.5942e-01, -7.3015e-02, -1.0000e+09,
        -1.6289e-01, -3.3029e-01, -1.2150e-01, -8.9630e-02, -1.0000e+09,
        -1.7178e-01, -2.9962e-01, -1.0016e-01, -9.2825e-02, -1.0000e+09,
        -6.2173e-02, -1.1879e-01, -1.7709e-01, -2.1991e-01, -1.0000e+09,
        -6.2173e-02, -1.7413e-01, -2.1756e-02, -1.1693e-01, -1.0000e+09,
        -4.0138e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.4878e-01, -9.2825e-02, -1.4480e-01, -1.0000e+09,
        -1.1771e-01, -3.6351e-01, -1.5942e-01, -7.3015e-02, -1.0000e+09,
        -1.6289e-01, -3.3029e-01, -1.2150e-01, -8.9630e-02, -1.0000e+09,
        -1.7178e-01, -2.9962e-01, -1.0016e-01, -9.2825e-02, -1.0000e+09,
        -6.2173e-02, -1.1879e-01, -1.7709e-01, -2.1991e-01, -1.0000e+09,
        -6.2173e-02, -1.7413e-01, -2.1756e-02, -1.1693e-01, -1.0000e+09,
        -4.0138e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.4878e-01, -9.2825e-02, -1.4480e-01, -1.0000e+09,
        -1.1771e-01, -3.6351e-01, -1.5942e-01, -7.3015e-02, -1.0000e+09,
        -1.6289e-01, -3.3029e-01, -1.2150e-01, -8.9630e-02, -1.0000e+09,
        -1.7178e-01, -2.9962e-01, -1.0016e-01, -9.2825e-02, -1.0000e+09,
        -6.2173e-02, -1.1879e-01, -1.7709e-01, -2.1991e-01, -1.0000e+09,
        -6.2173e-02, -1.7413e-01, -2.1756e-02, -1.1693e-01, -1.0000e+09,
        -4.0138e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.4878e-01, -9.2825e-02, -1.4480e-01, -1.0000e+09,
        -1.1771e-01, -3.6351e-01, -1.5942e-01, -7.3015e-02, -1.0000e+09,
        -1.6289e-01, -3.3029e-01, -1.2150e-01, -8.9630e-02, -1.0000e+09,
        -1.7178e-01, -2.9962e-01, -1.0016e-01, -9.2825e-02, -1.0000e+09,
        -6.2173e-02, -1.1879e-01, -1.7709e-01, -2.1991e-01, -1.0000e+09,
        -6.2173e-02, -1.7413e-01, -2.1756e-02, -1.1693e-01, -1.0000e+09,
        -4.0138e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.4878e-01, -9.2825e-02, -1.4480e-01, -1.0000e+09,
        -1.1771e-01, -3.6351e-01, -1.5942e-01, -7.3015e-02, -1.0000e+09,
        -1.6289e-01, -3.3029e-01, -1.2150e-01, -8.9630e-02, -1.0000e+09,
        -1.7178e-01, -2.9962e-01, -1.0016e-01, -9.2825e-02, -1.0000e+09,
        -6.2173e-02, -1.1879e-01, -1.7709e-01, -2.1991e-01, -1.0000e+09,
        -6.2173e-02, -1.7413e-01, -2.1756e-02, -1.1693e-01, -1.0000e+09,
        -4.0138e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.4878e-01, -9.2825e-02, -1.4480e-01, -1.0000e+09,
        -1.1771e-01, -3.6351e-01, -1.5942e-01, -7.3015e-02, -1.0000e+09,
        -1.6289e-01, -3.3029e-01, -1.2150e-01, -8.9630e-02, -1.0000e+09,
        -1.7178e-01, -2.9962e-01, -1.0016e-01, -9.2825e-02, -1.0000e+09,
        -6.2173e-02, -1.1879e-01, -1.7709e-01, -2.1991e-01, -1.0000e+09,
        -6.2173e-02, -1.7413e-01, -2.1756e-02, -1.1693e-01, -1.0000e+09,
        -4.0138e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.4878e-01, -9.2825e-02, -1.4480e-01, -1.0000e+09,
        -1.1771e-01, -3.6351e-01, -1.5942e-01, -7.3015e-02, -1.0000e+09,
        -1.6289e-01, -3.3029e-01, -1.2150e-01, -8.9630e-02, -1.0000e+09,
        -1.7178e-01, -2.9962e-01, -1.0016e-01, -9.2825e-02, -1.0000e+09,
        -6.2173e-02, -1.1879e-01, -1.7709e-01, -2.1991e-01, -1.0000e+09,
        -6.2173e-02, -1.7413e-01, -2.1756e-02, -1.1693e-01, -1.0000e+09,
        -4.0138e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.4878e-01, -9.2825e-02, -1.4480e-01, -1.0000e+09,
        -1.1771e-01, -3.6351e-01, -1.5942e-01, -7.3015e-02, -1.0000e+09,
        -1.6289e-01, -3.3029e-01, -1.2150e-01, -8.9630e-02, -1.0000e+09,
        -1.7178e-01, -2.9962e-01, -1.0016e-01, -9.2825e-02, -1.0000e+09,
        -6.2173e-02, -1.1879e-01, -1.7709e-01, -2.1991e-01, -1.0000e+09,
        -6.2173e-02, -1.7413e-01, -2.1756e-02, -1.1693e-01, -1.0000e+09,
        -4.0138e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.4878e-01, -9.2825e-02, -1.4480e-01, -1.0000e+09,
        -1.1771e-01, -3.6351e-01, -1.5942e-01, -7.3015e-02, -1.0000e+09,
        -1.6289e-01, -3.3029e-01, -1.2150e-01, -8.9630e-02, -1.0000e+09,
        -1.7178e-01, -2.9962e-01, -1.0016e-01, -9.2825e-02, -1.0000e+09,
        -6.2173e-02, -1.1879e-01, -1.7709e-01, -2.1991e-01, -1.0000e+09,
        -6.2173e-02, -1.7413e-01, -2.1756e-02, -1.1693e-01, -1.0000e+09,
        -4.0138e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.4878e-01, -9.2825e-02, -1.4480e-01, -1.0000e+09,
        -1.1771e-01, -3.6351e-01, -1.5942e-01, -7.3015e-02, -1.0000e+09,
        -1.6289e-01, -3.3029e-01, -1.2150e-01, -8.9630e-02, -1.0000e+09,
        -1.7178e-01, -2.9962e-01, -1.0016e-01, -9.2825e-02, -1.0000e+09,
        -6.2173e-02, -1.1879e-01, -1.7709e-01, -2.1991e-01, -1.0000e+09,
        -6.2173e-02, -1.7413e-01, -2.1756e-02, -1.1693e-01, -1.0000e+09,
        -4.0138e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.4878e-01, -9.2825e-02, -1.4480e-01, -1.0000e+09,
        -1.1771e-01, -3.6351e-01, -1.5942e-01, -7.3015e-02, -1.0000e+09,
        -1.6289e-01, -3.3029e-01, -1.2150e-01, -8.9630e-02, -1.0000e+09,
        -1.7178e-01, -2.9962e-01, -1.0016e-01, -9.2825e-02, -1.0000e+09,
        -6.2173e-02, -1.1879e-01, -1.7709e-01, -2.1991e-01, -1.0000e+09,
        -6.2173e-02, -1.7413e-01, -2.1756e-02, -1.1693e-01, -1.0000e+09,
        -4.0138e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.3449e-01, -1.3170e-01, -1.0672e-01, -1.0000e+09,
        -1.2094e-01, -1.4362e-01, -1.4073e-01, -3.7920e-03, -1.0000e+09,
        -1.0000e-01, -3.5433e-01, -1.5558e-01, -6.9166e-02, -1.0000e+09,
        -1.4233e-01, -8.0162e-02, -9.5430e-02, -1.1226e-01,  1.8556e-01,
        -4.0138e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.3449e-01, -1.3170e-01, -1.0672e-01, -1.0000e+09,
        -1.2094e-01, -1.4362e-01, -1.4073e-01, -3.7920e-03, -1.0000e+09,
        -1.0000e-01, -3.5433e-01, -1.5558e-01, -6.9166e-02, -1.0000e+09,
        -1.4233e-01, -8.0162e-02, -9.5430e-02, -1.1226e-01,  1.8556e-01,
        -4.0138e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.3449e-01, -1.3170e-01, -1.0672e-01, -1.0000e+09,
        -1.2094e-01, -1.4362e-01, -1.4073e-01, -3.7920e-03, -1.0000e+09,
        -1.0000e-01, -3.5433e-01, -1.5558e-01, -6.9166e-02, -1.0000e+09,
        -1.4233e-01, -8.0162e-02, -9.5430e-02, -1.1226e-01,  1.8556e-01,
        -4.0138e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.3449e-01, -1.3170e-01, -1.0672e-01, -1.0000e+09,
        -1.2094e-01, -1.4362e-01, -1.4073e-01, -3.7920e-03, -1.0000e+09,
        -1.0000e-01, -3.5433e-01, -1.5558e-01, -6.9166e-02, -1.0000e+09,
        -1.4233e-01, -8.0162e-02, -9.5430e-02, -1.1226e-01,  1.8556e-01,
        -4.0138e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.3449e-01, -1.3170e-01, -1.0672e-01, -1.0000e+09,
        -1.2094e-01, -1.4362e-01, -1.4073e-01, -3.7920e-03, -1.0000e+09,
        -1.0000e-01, -3.5433e-01, -1.5558e-01, -6.9166e-02, -1.0000e+09,
        -1.4233e-01, -8.0162e-02, -9.5430e-02, -1.1226e-01,  1.8556e-01,
        -4.0138e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.3449e-01, -1.3170e-01, -1.0672e-01, -1.0000e+09,
        -1.2094e-01, -1.4362e-01, -1.4073e-01, -3.7920e-03, -1.0000e+09,
        -1.0000e-01, -3.5433e-01, -1.5558e-01, -6.9166e-02, -1.0000e+09,
        -1.4233e-01, -8.0162e-02, -9.5430e-02, -1.1226e-01,  1.8556e-01,
        -4.0138e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.3449e-01, -1.3170e-01, -1.0672e-01, -1.0000e+09,
        -1.2094e-01, -1.4362e-01, -1.4073e-01, -3.7920e-03, -1.0000e+09,
        -1.0000e-01, -3.5433e-01, -1.5558e-01, -6.9166e-02, -1.0000e+09,
        -1.4233e-01, -8.0162e-02, -9.5430e-02, -1.1226e-01,  1.8556e-01,
        -4.0138e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.3449e-01, -1.3170e-01, -1.0672e-01, -1.0000e+09,
        -1.2094e-01, -1.4362e-01, -1.4073e-01, -3.7920e-03, -1.0000e+09,
        -1.0000e-01, -3.5433e-01, -1.5558e-01, -6.9166e-02, -1.0000e+09,
        -1.4233e-01, -8.0162e-02, -9.5430e-02, -1.1226e-01,  1.8556e-01,
        -4.0138e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.3449e-01, -1.3170e-01, -1.0672e-01, -1.0000e+09,
        -1.2094e-01, -1.4362e-01, -1.4073e-01, -3.7920e-03, -1.0000e+09,
        -1.0000e-01, -3.5433e-01, -1.5558e-01, -6.9166e-02, -1.0000e+09,
        -1.4233e-01, -8.0162e-02, -9.5430e-02, -1.1226e-01,  1.8556e-01,
        -4.0138e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.3449e-01, -1.3170e-01, -1.0672e-01, -1.0000e+09,
        -1.2094e-01, -1.4362e-01, -1.4073e-01, -3.7920e-03, -1.0000e+09,
        -1.0000e-01, -3.5433e-01, -1.5558e-01, -6.9166e-02, -1.0000e+09,
        -1.4233e-01, -8.0162e-02, -9.5430e-02, -1.1226e-01,  1.8556e-01,
        -4.0138e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.3449e-01, -1.3170e-01, -1.0672e-01, -1.0000e+09,
        -1.2094e-01, -1.4362e-01, -1.4073e-01, -3.7920e-03, -1.0000e+09,
        -1.0000e-01, -3.5433e-01, -1.5558e-01, -6.9166e-02, -1.0000e+09,
        -1.4233e-01, -8.0162e-02, -9.5430e-02, -1.1226e-01,  1.8556e-01,
        -4.0138e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.3449e-01, -1.3170e-01, -1.0672e-01, -1.0000e+09,
        -1.2094e-01, -1.4362e-01, -1.4073e-01, -3.7920e-03, -1.0000e+09,
        -1.0000e-01, -3.5433e-01, -1.5558e-01, -6.9166e-02, -1.0000e+09,
        -1.4233e-01, -8.0162e-02, -9.5430e-02, -1.1226e-01,  1.8556e-01,
        -4.0138e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1832e-01, -1.0000e+09, -1.3659e-01, -3.3351e-01,
        -1.0000e+09, -1.1502e-01, -5.9233e-02, -1.0000e+09, -1.0891e-01,
        -4.0188e-03, -1.0000e+09, -1.3700e-01, -9.9771e-02, -1.0000e+09,
        -1.7709e-01,  1.0700e-01, -1.0000e+09, -1.7379e-01, -4.0188e-03,
        -1.0000e+09, -1.5385e-03, -3.2257e-01, -1.0000e+09, -6.9521e-02,
        -4.6625e-01, -1.0000e+09, -1.7709e-01, -1.9111e-01, -1.0000e+09,
        -1.1500e-01, -1.5032e-01, -1.0000e+09, -4.1365e-02,  1.8556e-01,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1832e-01, -1.0000e+09, -1.3659e-01, -3.3351e-01,
        -1.0000e+09, -1.1502e-01, -5.9233e-02, -1.0000e+09, -1.0891e-01,
        -4.0188e-03, -1.0000e+09, -1.3700e-01, -9.9771e-02, -1.0000e+09,
        -1.7709e-01,  1.0700e-01, -1.0000e+09, -1.7379e-01, -4.0188e-03,
        -1.0000e+09, -1.5385e-03, -3.2257e-01, -1.0000e+09, -6.9521e-02,
        -4.6625e-01, -1.0000e+09, -1.7709e-01, -1.9111e-01, -1.0000e+09,
        -1.1500e-01, -1.5032e-01, -1.0000e+09, -4.1365e-02,  1.8556e-01,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1832e-01, -1.0000e+09, -1.3659e-01, -3.3351e-01,
        -1.0000e+09, -1.1502e-01, -5.9233e-02, -1.0000e+09, -1.0891e-01,
        -4.0188e-03, -1.0000e+09, -1.3700e-01, -9.9771e-02, -1.0000e+09,
        -1.7709e-01,  1.0700e-01, -1.0000e+09, -1.7379e-01, -4.0188e-03,
        -1.0000e+09, -1.5385e-03, -3.2257e-01, -1.0000e+09, -6.9521e-02,
        -4.6625e-01, -1.0000e+09, -1.7709e-01, -1.9111e-01, -1.0000e+09,
        -1.1500e-01, -1.5032e-01, -1.0000e+09, -4.1365e-02,  1.8556e-01,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1832e-01, -1.0000e+09, -1.3659e-01, -3.3351e-01,
        -1.0000e+09, -1.1502e-01, -5.9233e-02, -1.0000e+09, -1.0891e-01,
        -4.0188e-03, -1.0000e+09, -1.3700e-01, -9.9771e-02, -1.0000e+09,
        -1.7709e-01,  1.0700e-01, -1.0000e+09, -1.7379e-01, -4.0188e-03,
        -1.0000e+09, -1.5385e-03, -3.2257e-01, -1.0000e+09, -6.9521e-02,
        -4.6625e-01, -1.0000e+09, -1.7709e-01, -1.9111e-01, -1.0000e+09,
        -1.1500e-01, -1.5032e-01, -1.0000e+09, -4.1365e-02,  1.8556e-01,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1832e-01, -1.0000e+09, -1.3659e-01, -3.3351e-01,
        -1.0000e+09, -1.1502e-01, -5.9233e-02, -1.0000e+09, -1.0891e-01,
        -4.0188e-03, -1.0000e+09, -1.3700e-01, -9.9771e-02, -1.0000e+09,
        -1.7709e-01,  1.0700e-01, -1.0000e+09, -1.7379e-01, -4.0188e-03,
        -1.0000e+09, -1.5385e-03, -3.2257e-01, -1.0000e+09, -6.9521e-02,
        -4.6625e-01, -1.0000e+09, -1.7709e-01, -1.9111e-01, -1.0000e+09,
        -1.1500e-01, -1.5032e-01, -1.0000e+09, -4.1365e-02,  1.8556e-01,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1832e-01, -1.0000e+09, -1.3659e-01, -3.3351e-01,
        -1.0000e+09, -1.1502e-01, -5.9233e-02, -1.0000e+09, -1.0891e-01,
        -4.0188e-03, -1.0000e+09, -1.3700e-01, -9.9771e-02, -1.0000e+09,
        -1.7709e-01,  1.0700e-01, -1.0000e+09, -1.7379e-01, -4.0188e-03,
        -1.0000e+09, -1.5385e-03, -3.2257e-01, -1.0000e+09, -6.9521e-02,
        -4.6625e-01, -1.0000e+09, -1.7709e-01, -1.9111e-01, -1.0000e+09,
        -1.1500e-01, -1.5032e-01, -1.0000e+09, -4.1365e-02,  1.8556e-01,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1832e-01, -1.0000e+09, -1.3659e-01, -3.3351e-01,
        -1.0000e+09, -1.1502e-01, -5.9233e-02, -1.0000e+09, -1.0891e-01,
        -4.0188e-03, -1.0000e+09, -1.3700e-01, -9.9771e-02, -1.0000e+09,
        -1.7709e-01,  1.0700e-01, -1.0000e+09, -1.7379e-01, -4.0188e-03,
        -1.0000e+09, -1.5385e-03, -3.2257e-01, -1.0000e+09, -6.9521e-02,
        -4.6625e-01, -1.0000e+09, -1.7709e-01, -1.9111e-01, -1.0000e+09,
        -1.1500e-01, -1.5032e-01, -1.0000e+09, -4.1365e-02,  1.8556e-01,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1832e-01, -1.0000e+09, -1.3659e-01, -3.3351e-01,
        -1.0000e+09, -1.1502e-01, -5.9233e-02, -1.0000e+09, -1.0891e-01,
        -4.0188e-03, -1.0000e+09, -1.3700e-01, -9.9771e-02, -1.0000e+09,
        -1.7709e-01,  1.0700e-01, -1.0000e+09, -1.7379e-01, -4.0188e-03,
        -1.0000e+09, -1.5385e-03, -3.2257e-01, -1.0000e+09, -6.9521e-02,
        -4.6625e-01, -1.0000e+09, -1.7709e-01, -1.9111e-01, -1.0000e+09,
        -1.1500e-01, -1.5032e-01, -1.0000e+09, -4.1365e-02,  1.8556e-01,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1832e-01, -1.0000e+09, -1.3659e-01, -3.3351e-01,
        -1.0000e+09, -1.1502e-01, -5.9233e-02, -1.0000e+09, -1.0891e-01,
        -4.0188e-03, -1.0000e+09, -1.3700e-01, -9.9771e-02, -1.0000e+09,
        -1.7709e-01,  1.0700e-01, -1.0000e+09, -1.7379e-01, -4.0188e-03,
        -1.0000e+09, -1.5385e-03, -3.2257e-01, -1.0000e+09, -6.9521e-02,
        -4.6625e-01, -1.0000e+09, -1.7709e-01, -1.9111e-01, -1.0000e+09,
        -1.1500e-01, -1.5032e-01, -1.0000e+09, -4.1365e-02,  1.8556e-01,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1832e-01, -1.0000e+09, -1.3659e-01, -3.3351e-01,
        -1.0000e+09, -1.1502e-01, -5.9233e-02, -1.0000e+09, -1.0891e-01,
        -4.0188e-03, -1.0000e+09, -1.3700e-01, -9.9771e-02, -1.0000e+09,
        -1.7709e-01,  1.0700e-01, -1.0000e+09, -1.7379e-01, -4.0188e-03,
        -1.0000e+09, -1.5385e-03, -3.2257e-01, -1.0000e+09, -6.9521e-02,
        -4.6625e-01, -1.0000e+09, -1.7709e-01, -1.9111e-01, -1.0000e+09,
        -1.1500e-01, -1.5032e-01, -1.0000e+09, -4.1365e-02,  1.8556e-01,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1832e-01, -1.0000e+09, -1.3659e-01, -3.3351e-01,
        -1.0000e+09, -1.1502e-01, -5.9233e-02, -1.0000e+09, -1.0891e-01,
        -4.0188e-03, -1.0000e+09, -1.3700e-01, -9.9771e-02, -1.0000e+09,
        -1.7709e-01,  1.0700e-01, -1.0000e+09, -1.7379e-01, -4.0188e-03,
        -1.0000e+09, -1.5385e-03, -3.2257e-01, -1.0000e+09, -6.9521e-02,
        -4.6625e-01, -1.0000e+09, -1.7709e-01, -1.9111e-01, -1.0000e+09,
        -1.1500e-01, -1.5032e-01, -1.0000e+09, -4.1365e-02,  1.8556e-01,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1832e-01, -1.0000e+09, -1.3659e-01, -3.3351e-01,
        -1.0000e+09, -1.1502e-01, -5.9233e-02, -1.0000e+09, -1.0891e-01,
        -4.0188e-03, -1.0000e+09, -1.3700e-01, -9.9771e-02, -1.0000e+09,
        -1.7709e-01,  1.0700e-01, -1.0000e+09, -1.7379e-01, -4.0188e-03,
        -1.0000e+09, -1.5385e-03, -3.2257e-01, -1.0000e+09, -6.9521e-02,
        -4.6625e-01, -1.0000e+09, -1.7709e-01, -1.9111e-01, -1.0000e+09,
        -1.1500e-01, -1.5032e-01, -1.0000e+09, -4.1365e-02,  1.8556e-01,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.6178e-01, -1.0000e+09, -8.9630e-02, -4.1832e-01,
        -1.0000e+09, -1.2216e-01, -4.1971e-01, -1.0000e+09, -1.2252e-01,
        -2.1932e-01, -1.0000e+09, -1.1936e-01, -3.2639e-01, -1.0000e+09,
        -9.0660e-02, -2.2722e-01, -1.0000e+09, -8.0235e-02, -2.9854e-01,
        -1.0000e+09, -1.3470e-01, -2.1412e-01, -1.0000e+09, -4.0188e-03,
        -5.9550e-02, -1.3126e-01, -2.1756e-02, -2.4856e-01, -1.0000e+09,
        -1.6998e-01, -2.6069e-01, -1.0000e+09, -6.9521e-02, -3.5735e-01,
        -1.0000e+09, -1.4696e-01, -3.4281e-01, -1.0000e+09, -5.1311e-02,
        -1.1236e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.6178e-01, -1.0000e+09, -8.9630e-02, -4.1832e-01,
        -1.0000e+09, -1.2216e-01, -4.1971e-01, -1.0000e+09, -1.2252e-01,
        -2.1932e-01, -1.0000e+09, -1.1936e-01, -3.2639e-01, -1.0000e+09,
        -9.0660e-02, -2.2722e-01, -1.0000e+09, -8.0235e-02, -2.9854e-01,
        -1.0000e+09, -1.3470e-01, -2.1412e-01, -1.0000e+09, -4.0188e-03,
        -5.9550e-02, -1.3126e-01, -2.1756e-02, -2.4856e-01, -1.0000e+09,
        -1.6998e-01, -2.6069e-01, -1.0000e+09, -6.9521e-02, -3.5735e-01,
        -1.0000e+09, -1.4696e-01, -3.4281e-01, -1.0000e+09, -5.1311e-02,
        -1.1236e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.6178e-01, -1.0000e+09, -8.9630e-02, -4.1832e-01,
        -1.0000e+09, -1.2216e-01, -4.1971e-01, -1.0000e+09, -1.2252e-01,
        -2.1932e-01, -1.0000e+09, -1.1936e-01, -3.2639e-01, -1.0000e+09,
        -9.0660e-02, -2.2722e-01, -1.0000e+09, -8.0235e-02, -2.9854e-01,
        -1.0000e+09, -1.3470e-01, -2.1412e-01, -1.0000e+09, -4.0188e-03,
        -5.9550e-02, -1.3126e-01, -2.1756e-02, -2.4856e-01, -1.0000e+09,
        -1.6998e-01, -2.6069e-01, -1.0000e+09, -6.9521e-02, -3.5735e-01,
        -1.0000e+09, -1.4696e-01, -3.4281e-01, -1.0000e+09, -5.1311e-02,
        -1.1236e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.6178e-01, -1.0000e+09, -8.9630e-02, -4.1832e-01,
        -1.0000e+09, -1.2216e-01, -4.1971e-01, -1.0000e+09, -1.2252e-01,
        -2.1932e-01, -1.0000e+09, -1.1936e-01, -3.2639e-01, -1.0000e+09,
        -9.0660e-02, -2.2722e-01, -1.0000e+09, -8.0235e-02, -2.9854e-01,
        -1.0000e+09, -1.3470e-01, -2.1412e-01, -1.0000e+09, -4.0188e-03,
        -5.9550e-02, -1.3126e-01, -2.1756e-02, -2.4856e-01, -1.0000e+09,
        -1.6998e-01, -2.6069e-01, -1.0000e+09, -6.9521e-02, -3.5735e-01,
        -1.0000e+09, -1.4696e-01, -3.4281e-01, -1.0000e+09, -5.1311e-02,
        -1.1236e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.6178e-01, -1.0000e+09, -8.9630e-02, -4.1832e-01,
        -1.0000e+09, -1.2216e-01, -4.1971e-01, -1.0000e+09, -1.2252e-01,
        -2.1932e-01, -1.0000e+09, -1.1936e-01, -3.2639e-01, -1.0000e+09,
        -9.0660e-02, -2.2722e-01, -1.0000e+09, -8.0235e-02, -2.9854e-01,
        -1.0000e+09, -1.3470e-01, -2.1412e-01, -1.0000e+09, -4.0188e-03,
        -5.9550e-02, -1.3126e-01, -2.1756e-02, -2.4856e-01, -1.0000e+09,
        -1.6998e-01, -2.6069e-01, -1.0000e+09, -6.9521e-02, -3.5735e-01,
        -1.0000e+09, -1.4696e-01, -3.4281e-01, -1.0000e+09, -5.1311e-02,
        -1.1236e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.6178e-01, -1.0000e+09, -8.9630e-02, -4.1832e-01,
        -1.0000e+09, -1.2216e-01, -4.1971e-01, -1.0000e+09, -1.2252e-01,
        -2.1932e-01, -1.0000e+09, -1.1936e-01, -3.2639e-01, -1.0000e+09,
        -9.0660e-02, -2.2722e-01, -1.0000e+09, -8.0235e-02, -2.9854e-01,
        -1.0000e+09, -1.3470e-01, -2.1412e-01, -1.0000e+09, -4.0188e-03,
        -5.9550e-02, -1.3126e-01, -2.1756e-02, -2.4856e-01, -1.0000e+09,
        -1.6998e-01, -2.6069e-01, -1.0000e+09, -6.9521e-02, -3.5735e-01,
        -1.0000e+09, -1.4696e-01, -3.4281e-01, -1.0000e+09, -5.1311e-02,
        -1.1236e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.6178e-01, -1.0000e+09, -8.9630e-02, -4.1832e-01,
        -1.0000e+09, -1.2216e-01, -4.1971e-01, -1.0000e+09, -1.2252e-01,
        -2.1932e-01, -1.0000e+09, -1.1936e-01, -3.2639e-01, -1.0000e+09,
        -9.0660e-02, -2.2722e-01, -1.0000e+09, -8.0235e-02, -2.9854e-01,
        -1.0000e+09, -1.3470e-01, -2.1412e-01, -1.0000e+09, -4.0188e-03,
        -5.9550e-02, -1.3126e-01, -2.1756e-02, -2.4856e-01, -1.0000e+09,
        -1.6998e-01, -2.6069e-01, -1.0000e+09, -6.9521e-02, -3.5735e-01,
        -1.0000e+09, -1.4696e-01, -3.4281e-01, -1.0000e+09, -5.1311e-02,
        -1.1236e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.6178e-01, -1.0000e+09, -8.9630e-02, -4.1832e-01,
        -1.0000e+09, -1.2216e-01, -4.1971e-01, -1.0000e+09, -1.2252e-01,
        -2.1932e-01, -1.0000e+09, -1.1936e-01, -3.2639e-01, -1.0000e+09,
        -9.0660e-02, -2.2722e-01, -1.0000e+09, -8.0235e-02, -2.9854e-01,
        -1.0000e+09, -1.3470e-01, -2.1412e-01, -1.0000e+09, -4.0188e-03,
        -5.9550e-02, -1.3126e-01, -2.1756e-02, -2.4856e-01, -1.0000e+09,
        -1.6998e-01, -2.6069e-01, -1.0000e+09, -6.9521e-02, -3.5735e-01,
        -1.0000e+09, -1.4696e-01, -3.4281e-01, -1.0000e+09, -5.1311e-02,
        -1.1236e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.6178e-01, -1.0000e+09, -8.9630e-02, -4.1832e-01,
        -1.0000e+09, -1.2216e-01, -4.1971e-01, -1.0000e+09, -1.2252e-01,
        -2.1932e-01, -1.0000e+09, -1.1936e-01, -3.2639e-01, -1.0000e+09,
        -9.0660e-02, -2.2722e-01, -1.0000e+09, -8.0235e-02, -2.9854e-01,
        -1.0000e+09, -1.3470e-01, -2.1412e-01, -1.0000e+09, -4.0188e-03,
        -5.9550e-02, -1.3126e-01, -2.1756e-02, -2.4856e-01, -1.0000e+09,
        -1.6998e-01, -2.6069e-01, -1.0000e+09, -6.9521e-02, -3.5735e-01,
        -1.0000e+09, -1.4696e-01, -3.4281e-01, -1.0000e+09, -5.1311e-02,
        -1.1236e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.6178e-01, -1.0000e+09, -8.9630e-02, -4.1832e-01,
        -1.0000e+09, -1.2216e-01, -4.1971e-01, -1.0000e+09, -1.2252e-01,
        -2.1932e-01, -1.0000e+09, -1.1936e-01, -3.2639e-01, -1.0000e+09,
        -9.0660e-02, -2.2722e-01, -1.0000e+09, -8.0235e-02, -2.9854e-01,
        -1.0000e+09, -1.3470e-01, -2.1412e-01, -1.0000e+09, -4.0188e-03,
        -5.9550e-02, -1.3126e-01, -2.1756e-02, -2.4856e-01, -1.0000e+09,
        -1.6998e-01, -2.6069e-01, -1.0000e+09, -6.9521e-02, -3.5735e-01,
        -1.0000e+09, -1.4696e-01, -3.4281e-01, -1.0000e+09, -5.1311e-02,
        -1.1236e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.6178e-01, -1.0000e+09, -8.9630e-02, -4.1832e-01,
        -1.0000e+09, -1.2216e-01, -4.1971e-01, -1.0000e+09, -1.2252e-01,
        -2.1932e-01, -1.0000e+09, -1.1936e-01, -3.2639e-01, -1.0000e+09,
        -9.0660e-02, -2.2722e-01, -1.0000e+09, -8.0235e-02, -2.9854e-01,
        -1.0000e+09, -1.3470e-01, -2.1412e-01, -1.0000e+09, -4.0188e-03,
        -5.9550e-02, -1.3126e-01, -2.1756e-02, -2.4856e-01, -1.0000e+09,
        -1.6998e-01, -2.6069e-01, -1.0000e+09, -6.9521e-02, -3.5735e-01,
        -1.0000e+09, -1.4696e-01, -3.4281e-01, -1.0000e+09, -5.1311e-02,
        -1.1236e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.6178e-01, -1.0000e+09, -8.9630e-02, -4.1832e-01,
        -1.0000e+09, -1.2216e-01, -4.1971e-01, -1.0000e+09, -1.2252e-01,
        -2.1932e-01, -1.0000e+09, -1.1936e-01, -3.2639e-01, -1.0000e+09,
        -9.0660e-02, -2.2722e-01, -1.0000e+09, -8.0235e-02, -2.9854e-01,
        -1.0000e+09, -1.3470e-01, -2.1412e-01, -1.0000e+09, -4.0188e-03,
        -5.9550e-02, -1.3126e-01, -2.1756e-02, -2.4856e-01, -1.0000e+09,
        -1.6998e-01, -2.6069e-01, -1.0000e+09, -6.9521e-02, -3.5735e-01,
        -1.0000e+09, -1.4696e-01, -3.4281e-01, -1.0000e+09, -5.1311e-02,
        -1.1236e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -3.9005e-01, -8.5644e-02, -1.4842e-01, -1.0000e+09,
        -2.1756e-02, -2.2534e-01, -9.1441e-02, -1.3126e-01, -1.0000e+09,
        -1.9730e-01, -3.8635e-01, -7.8284e-02, -1.3126e-01, -1.0000e+09,
        -9.3270e-02,  1.8556e-01, -1.7587e-01, -1.7587e-01, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -3.9005e-01, -8.5644e-02, -1.4842e-01, -1.0000e+09,
        -2.1756e-02, -2.2534e-01, -9.1441e-02, -1.3126e-01, -1.0000e+09,
        -1.9730e-01, -3.8635e-01, -7.8284e-02, -1.3126e-01, -1.0000e+09,
        -9.3270e-02,  1.8556e-01, -1.7587e-01, -1.7587e-01, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -3.9005e-01, -8.5644e-02, -1.4842e-01, -1.0000e+09,
        -2.1756e-02, -2.2534e-01, -9.1441e-02, -1.3126e-01, -1.0000e+09,
        -1.9730e-01, -3.8635e-01, -7.8284e-02, -1.3126e-01, -1.0000e+09,
        -9.3270e-02,  1.8556e-01, -1.7587e-01, -1.7587e-01, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -3.9005e-01, -8.5644e-02, -1.4842e-01, -1.0000e+09,
        -2.1756e-02, -2.2534e-01, -9.1441e-02, -1.3126e-01, -1.0000e+09,
        -1.9730e-01, -3.8635e-01, -7.8284e-02, -1.3126e-01, -1.0000e+09,
        -9.3270e-02,  1.8556e-01, -1.7587e-01, -1.7587e-01, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -3.9005e-01, -8.5644e-02, -1.4842e-01, -1.0000e+09,
        -2.1756e-02, -2.2534e-01, -9.1441e-02, -1.3126e-01, -1.0000e+09,
        -1.9730e-01, -3.8635e-01, -7.8284e-02, -1.3126e-01, -1.0000e+09,
        -9.3270e-02,  1.8556e-01, -1.7587e-01, -1.7587e-01, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -3.9005e-01, -8.5644e-02, -1.4842e-01, -1.0000e+09,
        -2.1756e-02, -2.2534e-01, -9.1441e-02, -1.3126e-01, -1.0000e+09,
        -1.9730e-01, -3.8635e-01, -7.8284e-02, -1.3126e-01, -1.0000e+09,
        -9.3270e-02,  1.8556e-01, -1.7587e-01, -1.7587e-01, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -3.9005e-01, -8.5644e-02, -1.4842e-01, -1.0000e+09,
        -2.1756e-02, -2.2534e-01, -9.1441e-02, -1.3126e-01, -1.0000e+09,
        -1.9730e-01, -3.8635e-01, -7.8284e-02, -1.3126e-01, -1.0000e+09,
        -9.3270e-02,  1.8556e-01, -1.7587e-01, -1.7587e-01, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -3.9005e-01, -8.5644e-02, -1.4842e-01, -1.0000e+09,
        -2.1756e-02, -2.2534e-01, -9.1441e-02, -1.3126e-01, -1.0000e+09,
        -1.9730e-01, -3.8635e-01, -7.8284e-02, -1.3126e-01, -1.0000e+09,
        -9.3270e-02,  1.8556e-01, -1.7587e-01, -1.7587e-01, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -3.9005e-01, -8.5644e-02, -1.4842e-01, -1.0000e+09,
        -2.1756e-02, -2.2534e-01, -9.1441e-02, -1.3126e-01, -1.0000e+09,
        -1.9730e-01, -3.8635e-01, -7.8284e-02, -1.3126e-01, -1.0000e+09,
        -9.3270e-02,  1.8556e-01, -1.7587e-01, -1.7587e-01, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -3.9005e-01, -8.5644e-02, -1.4842e-01, -1.0000e+09,
        -2.1756e-02, -2.2534e-01, -9.1441e-02, -1.3126e-01, -1.0000e+09,
        -1.9730e-01, -3.8635e-01, -7.8284e-02, -1.3126e-01, -1.0000e+09,
        -9.3270e-02,  1.8556e-01, -1.7587e-01, -1.7587e-01, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -3.9005e-01, -8.5644e-02, -1.4842e-01, -1.0000e+09,
        -2.1756e-02, -2.2534e-01, -9.1441e-02, -1.3126e-01, -1.0000e+09,
        -1.9730e-01, -3.8635e-01, -7.8284e-02, -1.3126e-01, -1.0000e+09,
        -9.3270e-02,  1.8556e-01, -1.7587e-01, -1.7587e-01, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -3.9005e-01, -8.5644e-02, -1.4842e-01, -1.0000e+09,
        -2.1756e-02, -2.2534e-01, -9.1441e-02, -1.3126e-01, -1.0000e+09,
        -1.9730e-01, -3.8635e-01, -7.8284e-02, -1.3126e-01, -1.0000e+09,
        -9.3270e-02,  1.8556e-01, -1.7587e-01, -1.7587e-01, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -2.6236e-01, -1.0000e+09, -4.0188e-03, -3.8573e-01,
        -1.0000e+09,  1.1517e-01, -3.7498e-01, -1.0000e+09, -9.7549e-02,
        -4.7965e-01, -1.0000e+09, -1.4911e-01, -2.3636e-01, -1.0000e+09,
         1.2951e-01, -2.3416e-01, -1.0000e+09, -1.0713e-01, -3.2237e-01,
        -1.0000e+09, -9.5430e-02, -2.4856e-01, -1.0000e+09, -4.0188e-03,
        -1.7413e-01, -1.0000e+09, -1.3044e-01, -2.9419e-01, -1.0000e+09,
        -9.5430e-02, -4.1832e-01, -1.0000e+09,  1.8556e-01, -3.0998e-01,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -2.6236e-01, -1.0000e+09, -4.0188e-03, -3.8573e-01,
        -1.0000e+09,  1.1517e-01, -3.7498e-01, -1.0000e+09, -9.7549e-02,
        -4.7965e-01, -1.0000e+09, -1.4911e-01, -2.3636e-01, -1.0000e+09,
         1.2951e-01, -2.3416e-01, -1.0000e+09, -1.0713e-01, -3.2237e-01,
        -1.0000e+09, -9.5430e-02, -2.4856e-01, -1.0000e+09, -4.0188e-03,
        -1.7413e-01, -1.0000e+09, -1.3044e-01, -2.9419e-01, -1.0000e+09,
        -9.5430e-02, -4.1832e-01, -1.0000e+09,  1.8556e-01, -3.0998e-01,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -2.6236e-01, -1.0000e+09, -4.0188e-03, -3.8573e-01,
        -1.0000e+09,  1.1517e-01, -3.7498e-01, -1.0000e+09, -9.7549e-02,
        -4.7965e-01, -1.0000e+09, -1.4911e-01, -2.3636e-01, -1.0000e+09,
         1.2951e-01, -2.3416e-01, -1.0000e+09, -1.0713e-01, -3.2237e-01,
        -1.0000e+09, -9.5430e-02, -2.4856e-01, -1.0000e+09, -4.0188e-03,
        -1.7413e-01, -1.0000e+09, -1.3044e-01, -2.9419e-01, -1.0000e+09,
        -9.5430e-02, -4.1832e-01, -1.0000e+09,  1.8556e-01, -3.0998e-01,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -2.6236e-01, -1.0000e+09, -4.0188e-03, -3.8573e-01,
        -1.0000e+09,  1.1517e-01, -3.7498e-01, -1.0000e+09, -9.7549e-02,
        -4.7965e-01, -1.0000e+09, -1.4911e-01, -2.3636e-01, -1.0000e+09,
         1.2951e-01, -2.3416e-01, -1.0000e+09, -1.0713e-01, -3.2237e-01,
        -1.0000e+09, -9.5430e-02, -2.4856e-01, -1.0000e+09, -4.0188e-03,
        -1.7413e-01, -1.0000e+09, -1.3044e-01, -2.9419e-01, -1.0000e+09,
        -9.5430e-02, -4.1832e-01, -1.0000e+09,  1.8556e-01, -3.0998e-01,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -2.6236e-01, -1.0000e+09, -4.0188e-03, -3.8573e-01,
        -1.0000e+09,  1.1517e-01, -3.7498e-01, -1.0000e+09, -9.7549e-02,
        -4.7965e-01, -1.0000e+09, -1.4911e-01, -2.3636e-01, -1.0000e+09,
         1.2951e-01, -2.3416e-01, -1.0000e+09, -1.0713e-01, -3.2237e-01,
        -1.0000e+09, -9.5430e-02, -2.4856e-01, -1.0000e+09, -4.0188e-03,
        -1.7413e-01, -1.0000e+09, -1.3044e-01, -2.9419e-01, -1.0000e+09,
        -9.5430e-02, -4.1832e-01, -1.0000e+09,  1.8556e-01, -3.0998e-01,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -2.6236e-01, -1.0000e+09, -4.0188e-03, -3.8573e-01,
        -1.0000e+09,  1.1517e-01, -3.7498e-01, -1.0000e+09, -9.7549e-02,
        -4.7965e-01, -1.0000e+09, -1.4911e-01, -2.3636e-01, -1.0000e+09,
         1.2951e-01, -2.3416e-01, -1.0000e+09, -1.0713e-01, -3.2237e-01,
        -1.0000e+09, -9.5430e-02, -2.4856e-01, -1.0000e+09, -4.0188e-03,
        -1.7413e-01, -1.0000e+09, -1.3044e-01, -2.9419e-01, -1.0000e+09,
        -9.5430e-02, -4.1832e-01, -1.0000e+09,  1.8556e-01, -3.0998e-01,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -2.6236e-01, -1.0000e+09, -4.0188e-03, -3.8573e-01,
        -1.0000e+09,  1.1517e-01, -3.7498e-01, -1.0000e+09, -9.7549e-02,
        -4.7965e-01, -1.0000e+09, -1.4911e-01, -2.3636e-01, -1.0000e+09,
         1.2951e-01, -2.3416e-01, -1.0000e+09, -1.0713e-01, -3.2237e-01,
        -1.0000e+09, -9.5430e-02, -2.4856e-01, -1.0000e+09, -4.0188e-03,
        -1.7413e-01, -1.0000e+09, -1.3044e-01, -2.9419e-01, -1.0000e+09,
        -9.5430e-02, -4.1832e-01, -1.0000e+09,  1.8556e-01, -3.0998e-01,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -2.6236e-01, -1.0000e+09, -4.0188e-03, -3.8573e-01,
        -1.0000e+09,  1.1517e-01, -3.7498e-01, -1.0000e+09, -9.7549e-02,
        -4.7965e-01, -1.0000e+09, -1.4911e-01, -2.3636e-01, -1.0000e+09,
         1.2951e-01, -2.3416e-01, -1.0000e+09, -1.0713e-01, -3.2237e-01,
        -1.0000e+09, -9.5430e-02, -2.4856e-01, -1.0000e+09, -4.0188e-03,
        -1.7413e-01, -1.0000e+09, -1.3044e-01, -2.9419e-01, -1.0000e+09,
        -9.5430e-02, -4.1832e-01, -1.0000e+09,  1.8556e-01, -3.0998e-01,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -2.6236e-01, -1.0000e+09, -4.0188e-03, -3.8573e-01,
        -1.0000e+09,  1.1517e-01, -3.7498e-01, -1.0000e+09, -9.7549e-02,
        -4.7965e-01, -1.0000e+09, -1.4911e-01, -2.3636e-01, -1.0000e+09,
         1.2951e-01, -2.3416e-01, -1.0000e+09, -1.0713e-01, -3.2237e-01,
        -1.0000e+09, -9.5430e-02, -2.4856e-01, -1.0000e+09, -4.0188e-03,
        -1.7413e-01, -1.0000e+09, -1.3044e-01, -2.9419e-01, -1.0000e+09,
        -9.5430e-02, -4.1832e-01, -1.0000e+09,  1.8556e-01, -3.0998e-01,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -2.6236e-01, -1.0000e+09, -4.0188e-03, -3.8573e-01,
        -1.0000e+09,  1.1517e-01, -3.7498e-01, -1.0000e+09, -9.7549e-02,
        -4.7965e-01, -1.0000e+09, -1.4911e-01, -2.3636e-01, -1.0000e+09,
         1.2951e-01, -2.3416e-01, -1.0000e+09, -1.0713e-01, -3.2237e-01,
        -1.0000e+09, -9.5430e-02, -2.4856e-01, -1.0000e+09, -4.0188e-03,
        -1.7413e-01, -1.0000e+09, -1.3044e-01, -2.9419e-01, -1.0000e+09,
        -9.5430e-02, -4.1832e-01, -1.0000e+09,  1.8556e-01, -3.0998e-01,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -2.6236e-01, -1.0000e+09, -4.0188e-03, -3.8573e-01,
        -1.0000e+09,  1.1517e-01, -3.7498e-01, -1.0000e+09, -9.7549e-02,
        -4.7965e-01, -1.0000e+09, -1.4911e-01, -2.3636e-01, -1.0000e+09,
         1.2951e-01, -2.3416e-01, -1.0000e+09, -1.0713e-01, -3.2237e-01,
        -1.0000e+09, -9.5430e-02, -2.4856e-01, -1.0000e+09, -4.0188e-03,
        -1.7413e-01, -1.0000e+09, -1.3044e-01, -2.9419e-01, -1.0000e+09,
        -9.5430e-02, -4.1832e-01, -1.0000e+09,  1.8556e-01, -3.0998e-01,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -2.6236e-01, -1.0000e+09, -4.0188e-03, -3.8573e-01,
        -1.0000e+09,  1.1517e-01, -3.7498e-01, -1.0000e+09, -9.7549e-02,
        -4.7965e-01, -1.0000e+09, -1.4911e-01, -2.3636e-01, -1.0000e+09,
         1.2951e-01, -2.3416e-01, -1.0000e+09, -1.0713e-01, -3.2237e-01,
        -1.0000e+09, -9.5430e-02, -2.4856e-01, -1.0000e+09, -4.0188e-03,
        -1.7413e-01, -1.0000e+09, -1.3044e-01, -2.9419e-01, -1.0000e+09,
        -9.5430e-02, -4.1832e-01, -1.0000e+09,  1.8556e-01, -3.0998e-01,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1832e-01, -1.3057e-01, -5.9233e-02, -1.0000e+09,
        -4.0188e-03, -4.9375e-01, -1.0322e-01, -1.0777e-01, -1.0000e+09,
        -2.1756e-02, -6.0994e-02, -1.1020e-01, -1.1790e-01, -1.0000e+09,
        -1.7587e-01, -3.0998e-01, -5.6775e-02, -1.0500e-01, -1.0190e-01,
        -1.6022e-01, -3.0696e-01, -5.3774e-02, -1.2943e-01, -1.0000e+09,
        -8.9630e-02, -4.2399e-01, -1.0114e-01, -2.1756e-02, -1.0000e+09,
        -1.3126e-01, -3.7589e-01, -1.5864e-01, -1.6049e-01, -1.0000e+09,
        -1.1343e-01, -6.8695e-02, -1.5484e-01, -1.7121e-01, -1.0000e+09,
        -5.3774e-02, -3.0088e-01, -2.5756e-02, -4.0138e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1832e-01, -1.3057e-01, -5.9233e-02, -1.0000e+09,
        -4.0188e-03, -4.9375e-01, -1.0322e-01, -1.0777e-01, -1.0000e+09,
        -2.1756e-02, -6.0994e-02, -1.1020e-01, -1.1790e-01, -1.0000e+09,
        -1.7587e-01, -3.0998e-01, -5.6775e-02, -1.0500e-01, -1.0190e-01,
        -1.6022e-01, -3.0696e-01, -5.3774e-02, -1.2943e-01, -1.0000e+09,
        -8.9630e-02, -4.2399e-01, -1.0114e-01, -2.1756e-02, -1.0000e+09,
        -1.3126e-01, -3.7589e-01, -1.5864e-01, -1.6049e-01, -1.0000e+09,
        -1.1343e-01, -6.8695e-02, -1.5484e-01, -1.7121e-01, -1.0000e+09,
        -5.3774e-02, -3.0088e-01, -2.5756e-02, -4.0138e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1832e-01, -1.3057e-01, -5.9233e-02, -1.0000e+09,
        -4.0188e-03, -4.9375e-01, -1.0322e-01, -1.0777e-01, -1.0000e+09,
        -2.1756e-02, -6.0994e-02, -1.1020e-01, -1.1790e-01, -1.0000e+09,
        -1.7587e-01, -3.0998e-01, -5.6775e-02, -1.0500e-01, -1.0190e-01,
        -1.6022e-01, -3.0696e-01, -5.3774e-02, -1.2943e-01, -1.0000e+09,
        -8.9630e-02, -4.2399e-01, -1.0114e-01, -2.1756e-02, -1.0000e+09,
        -1.3126e-01, -3.7589e-01, -1.5864e-01, -1.6049e-01, -1.0000e+09,
        -1.1343e-01, -6.8695e-02, -1.5484e-01, -1.7121e-01, -1.0000e+09,
        -5.3774e-02, -3.0088e-01, -2.5756e-02, -4.0138e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1832e-01, -1.3057e-01, -5.9233e-02, -1.0000e+09,
        -4.0188e-03, -4.9375e-01, -1.0322e-01, -1.0777e-01, -1.0000e+09,
        -2.1756e-02, -6.0994e-02, -1.1020e-01, -1.1790e-01, -1.0000e+09,
        -1.7587e-01, -3.0998e-01, -5.6775e-02, -1.0500e-01, -1.0190e-01,
        -1.6022e-01, -3.0696e-01, -5.3774e-02, -1.2943e-01, -1.0000e+09,
        -8.9630e-02, -4.2399e-01, -1.0114e-01, -2.1756e-02, -1.0000e+09,
        -1.3126e-01, -3.7589e-01, -1.5864e-01, -1.6049e-01, -1.0000e+09,
        -1.1343e-01, -6.8695e-02, -1.5484e-01, -1.7121e-01, -1.0000e+09,
        -5.3774e-02, -3.0088e-01, -2.5756e-02, -4.0138e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1832e-01, -1.3057e-01, -5.9233e-02, -1.0000e+09,
        -4.0188e-03, -4.9375e-01, -1.0322e-01, -1.0777e-01, -1.0000e+09,
        -2.1756e-02, -6.0994e-02, -1.1020e-01, -1.1790e-01, -1.0000e+09,
        -1.7587e-01, -3.0998e-01, -5.6775e-02, -1.0500e-01, -1.0190e-01,
        -1.6022e-01, -3.0696e-01, -5.3774e-02, -1.2943e-01, -1.0000e+09,
        -8.9630e-02, -4.2399e-01, -1.0114e-01, -2.1756e-02, -1.0000e+09,
        -1.3126e-01, -3.7589e-01, -1.5864e-01, -1.6049e-01, -1.0000e+09,
        -1.1343e-01, -6.8695e-02, -1.5484e-01, -1.7121e-01, -1.0000e+09,
        -5.3774e-02, -3.0088e-01, -2.5756e-02, -4.0138e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1832e-01, -1.3057e-01, -5.9233e-02, -1.0000e+09,
        -4.0188e-03, -4.9375e-01, -1.0322e-01, -1.0777e-01, -1.0000e+09,
        -2.1756e-02, -6.0994e-02, -1.1020e-01, -1.1790e-01, -1.0000e+09,
        -1.7587e-01, -3.0998e-01, -5.6775e-02, -1.0500e-01, -1.0190e-01,
        -1.6022e-01, -3.0696e-01, -5.3774e-02, -1.2943e-01, -1.0000e+09,
        -8.9630e-02, -4.2399e-01, -1.0114e-01, -2.1756e-02, -1.0000e+09,
        -1.3126e-01, -3.7589e-01, -1.5864e-01, -1.6049e-01, -1.0000e+09,
        -1.1343e-01, -6.8695e-02, -1.5484e-01, -1.7121e-01, -1.0000e+09,
        -5.3774e-02, -3.0088e-01, -2.5756e-02, -4.0138e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1832e-01, -1.3057e-01, -5.9233e-02, -1.0000e+09,
        -4.0188e-03, -4.9375e-01, -1.0322e-01, -1.0777e-01, -1.0000e+09,
        -2.1756e-02, -6.0994e-02, -1.1020e-01, -1.1790e-01, -1.0000e+09,
        -1.7587e-01, -3.0998e-01, -5.6775e-02, -1.0500e-01, -1.0190e-01,
        -1.6022e-01, -3.0696e-01, -5.3774e-02, -1.2943e-01, -1.0000e+09,
        -8.9630e-02, -4.2399e-01, -1.0114e-01, -2.1756e-02, -1.0000e+09,
        -1.3126e-01, -3.7589e-01, -1.5864e-01, -1.6049e-01, -1.0000e+09,
        -1.1343e-01, -6.8695e-02, -1.5484e-01, -1.7121e-01, -1.0000e+09,
        -5.3774e-02, -3.0088e-01, -2.5756e-02, -4.0138e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1832e-01, -1.3057e-01, -5.9233e-02, -1.0000e+09,
        -4.0188e-03, -4.9375e-01, -1.0322e-01, -1.0777e-01, -1.0000e+09,
        -2.1756e-02, -6.0994e-02, -1.1020e-01, -1.1790e-01, -1.0000e+09,
        -1.7587e-01, -3.0998e-01, -5.6775e-02, -1.0500e-01, -1.0190e-01,
        -1.6022e-01, -3.0696e-01, -5.3774e-02, -1.2943e-01, -1.0000e+09,
        -8.9630e-02, -4.2399e-01, -1.0114e-01, -2.1756e-02, -1.0000e+09,
        -1.3126e-01, -3.7589e-01, -1.5864e-01, -1.6049e-01, -1.0000e+09,
        -1.1343e-01, -6.8695e-02, -1.5484e-01, -1.7121e-01, -1.0000e+09,
        -5.3774e-02, -3.0088e-01, -2.5756e-02, -4.0138e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1832e-01, -1.3057e-01, -5.9233e-02, -1.0000e+09,
        -4.0188e-03, -4.9375e-01, -1.0322e-01, -1.0777e-01, -1.0000e+09,
        -2.1756e-02, -6.0994e-02, -1.1020e-01, -1.1790e-01, -1.0000e+09,
        -1.7587e-01, -3.0998e-01, -5.6775e-02, -1.0500e-01, -1.0190e-01,
        -1.6022e-01, -3.0696e-01, -5.3774e-02, -1.2943e-01, -1.0000e+09,
        -8.9630e-02, -4.2399e-01, -1.0114e-01, -2.1756e-02, -1.0000e+09,
        -1.3126e-01, -3.7589e-01, -1.5864e-01, -1.6049e-01, -1.0000e+09,
        -1.1343e-01, -6.8695e-02, -1.5484e-01, -1.7121e-01, -1.0000e+09,
        -5.3774e-02, -3.0088e-01, -2.5756e-02, -4.0138e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1832e-01, -1.3057e-01, -5.9233e-02, -1.0000e+09,
        -4.0188e-03, -4.9375e-01, -1.0322e-01, -1.0777e-01, -1.0000e+09,
        -2.1756e-02, -6.0994e-02, -1.1020e-01, -1.1790e-01, -1.0000e+09,
        -1.7587e-01, -3.0998e-01, -5.6775e-02, -1.0500e-01, -1.0190e-01,
        -1.6022e-01, -3.0696e-01, -5.3774e-02, -1.2943e-01, -1.0000e+09,
        -8.9630e-02, -4.2399e-01, -1.0114e-01, -2.1756e-02, -1.0000e+09,
        -1.3126e-01, -3.7589e-01, -1.5864e-01, -1.6049e-01, -1.0000e+09,
        -1.1343e-01, -6.8695e-02, -1.5484e-01, -1.7121e-01, -1.0000e+09,
        -5.3774e-02, -3.0088e-01, -2.5756e-02, -4.0138e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1832e-01, -1.3057e-01, -5.9233e-02, -1.0000e+09,
        -4.0188e-03, -4.9375e-01, -1.0322e-01, -1.0777e-01, -1.0000e+09,
        -2.1756e-02, -6.0994e-02, -1.1020e-01, -1.1790e-01, -1.0000e+09,
        -1.7587e-01, -3.0998e-01, -5.6775e-02, -1.0500e-01, -1.0190e-01,
        -1.6022e-01, -3.0696e-01, -5.3774e-02, -1.2943e-01, -1.0000e+09,
        -8.9630e-02, -4.2399e-01, -1.0114e-01, -2.1756e-02, -1.0000e+09,
        -1.3126e-01, -3.7589e-01, -1.5864e-01, -1.6049e-01, -1.0000e+09,
        -1.1343e-01, -6.8695e-02, -1.5484e-01, -1.7121e-01, -1.0000e+09,
        -5.3774e-02, -3.0088e-01, -2.5756e-02, -4.0138e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1832e-01, -1.3057e-01, -5.9233e-02, -1.0000e+09,
        -4.0188e-03, -4.9375e-01, -1.0322e-01, -1.0777e-01, -1.0000e+09,
        -2.1756e-02, -6.0994e-02, -1.1020e-01, -1.1790e-01, -1.0000e+09,
        -1.7587e-01, -3.0998e-01, -5.6775e-02, -1.0500e-01, -1.0190e-01,
        -1.6022e-01, -3.0696e-01, -5.3774e-02, -1.2943e-01, -1.0000e+09,
        -8.9630e-02, -4.2399e-01, -1.0114e-01, -2.1756e-02, -1.0000e+09,
        -1.3126e-01, -3.7589e-01, -1.5864e-01, -1.6049e-01, -1.0000e+09,
        -1.1343e-01, -6.8695e-02, -1.5484e-01, -1.7121e-01, -1.0000e+09,
        -5.3774e-02, -3.0088e-01, -2.5756e-02, -4.0138e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.6941e-01, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.9791e-01, -1.0000e+09, -9.6072e-02, -3.8338e-01,
        -1.0000e+09, -1.2092e-01, -5.9233e-02, -6.4865e-02, -1.6592e-01,
        -2.3416e-01, -1.0000e+09, -8.4709e-02,  1.4825e-01, -1.0000e+09,
         1.8556e-01, -3.7498e-01, -1.0000e+09, -5.1681e-02, -1.2965e-01,
        -1.0000e+09, -1.9158e-01,  8.7607e-02, -1.0000e+09,  1.8556e-01,
        -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.9791e-01, -1.0000e+09, -9.6072e-02, -3.8338e-01,
        -1.0000e+09, -1.2092e-01, -5.9233e-02, -6.4865e-02, -1.6592e-01,
        -2.3416e-01, -1.0000e+09, -8.4709e-02,  1.4825e-01, -1.0000e+09,
         1.8556e-01, -3.7498e-01, -1.0000e+09, -5.1681e-02, -1.2965e-01,
        -1.0000e+09, -1.9158e-01,  8.7607e-02, -1.0000e+09,  1.8556e-01,
        -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.9791e-01, -1.0000e+09, -9.6072e-02, -3.8338e-01,
        -1.0000e+09, -1.2092e-01, -5.9233e-02, -6.4865e-02, -1.6592e-01,
        -2.3416e-01, -1.0000e+09, -8.4709e-02,  1.4825e-01, -1.0000e+09,
         1.8556e-01, -3.7498e-01, -1.0000e+09, -5.1681e-02, -1.2965e-01,
        -1.0000e+09, -1.9158e-01,  8.7607e-02, -1.0000e+09,  1.8556e-01,
        -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.9791e-01, -1.0000e+09, -9.6072e-02, -3.8338e-01,
        -1.0000e+09, -1.2092e-01, -5.9233e-02, -6.4865e-02, -1.6592e-01,
        -2.3416e-01, -1.0000e+09, -8.4709e-02,  1.4825e-01, -1.0000e+09,
         1.8556e-01, -3.7498e-01, -1.0000e+09, -5.1681e-02, -1.2965e-01,
        -1.0000e+09, -1.9158e-01,  8.7607e-02, -1.0000e+09,  1.8556e-01,
        -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.9791e-01, -1.0000e+09, -9.6072e-02, -3.8338e-01,
        -1.0000e+09, -1.2092e-01, -5.9233e-02, -6.4865e-02, -1.6592e-01,
        -2.3416e-01, -1.0000e+09, -8.4709e-02,  1.4825e-01, -1.0000e+09,
         1.8556e-01, -3.7498e-01, -1.0000e+09, -5.1681e-02, -1.2965e-01,
        -1.0000e+09, -1.9158e-01,  8.7607e-02, -1.0000e+09,  1.8556e-01,
        -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.9791e-01, -1.0000e+09, -9.6072e-02, -3.8338e-01,
        -1.0000e+09, -1.2092e-01, -5.9233e-02, -6.4865e-02, -1.6592e-01,
        -2.3416e-01, -1.0000e+09, -8.4709e-02,  1.4825e-01, -1.0000e+09,
         1.8556e-01, -3.7498e-01, -1.0000e+09, -5.1681e-02, -1.2965e-01,
        -1.0000e+09, -1.9158e-01,  8.7607e-02, -1.0000e+09,  1.8556e-01,
        -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.9791e-01, -1.0000e+09, -9.6072e-02, -3.8338e-01,
        -1.0000e+09, -1.2092e-01, -5.9233e-02, -6.4865e-02, -1.6592e-01,
        -2.3416e-01, -1.0000e+09, -8.4709e-02,  1.4825e-01, -1.0000e+09,
         1.8556e-01, -3.7498e-01, -1.0000e+09, -5.1681e-02, -1.2965e-01,
        -1.0000e+09, -1.9158e-01,  8.7607e-02, -1.0000e+09,  1.8556e-01,
        -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.9791e-01, -1.0000e+09, -9.6072e-02, -3.8338e-01,
        -1.0000e+09, -1.2092e-01, -5.9233e-02, -6.4865e-02, -1.6592e-01,
        -2.3416e-01, -1.0000e+09, -8.4709e-02,  1.4825e-01, -1.0000e+09,
         1.8556e-01, -3.7498e-01, -1.0000e+09, -5.1681e-02, -1.2965e-01,
        -1.0000e+09, -1.9158e-01,  8.7607e-02, -1.0000e+09,  1.8556e-01,
        -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.9791e-01, -1.0000e+09, -9.6072e-02, -3.8338e-01,
        -1.0000e+09, -1.2092e-01, -5.9233e-02, -6.4865e-02, -1.6592e-01,
        -2.3416e-01, -1.0000e+09, -8.4709e-02,  1.4825e-01, -1.0000e+09,
         1.8556e-01, -3.7498e-01, -1.0000e+09, -5.1681e-02, -1.2965e-01,
        -1.0000e+09, -1.9158e-01,  8.7607e-02, -1.0000e+09,  1.8556e-01,
        -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.9791e-01, -1.0000e+09, -9.6072e-02, -3.8338e-01,
        -1.0000e+09, -1.2092e-01, -5.9233e-02, -6.4865e-02, -1.6592e-01,
        -2.3416e-01, -1.0000e+09, -8.4709e-02,  1.4825e-01, -1.0000e+09,
         1.8556e-01, -3.7498e-01, -1.0000e+09, -5.1681e-02, -1.2965e-01,
        -1.0000e+09, -1.9158e-01,  8.7607e-02, -1.0000e+09,  1.8556e-01,
        -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.9791e-01, -1.0000e+09, -9.6072e-02, -3.8338e-01,
        -1.0000e+09, -1.2092e-01, -5.9233e-02, -6.4865e-02, -1.6592e-01,
        -2.3416e-01, -1.0000e+09, -8.4709e-02,  1.4825e-01, -1.0000e+09,
         1.8556e-01, -3.7498e-01, -1.0000e+09, -5.1681e-02, -1.2965e-01,
        -1.0000e+09, -1.9158e-01,  8.7607e-02, -1.0000e+09,  1.8556e-01,
        -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.9791e-01, -1.0000e+09, -9.6072e-02, -3.8338e-01,
        -1.0000e+09, -1.2092e-01, -5.9233e-02, -6.4865e-02, -1.6592e-01,
        -2.3416e-01, -1.0000e+09, -8.4709e-02,  1.4825e-01, -1.0000e+09,
         1.8556e-01, -3.7498e-01, -1.0000e+09, -5.1681e-02, -1.2965e-01,
        -1.0000e+09, -1.9158e-01,  8.7607e-02, -1.0000e+09,  1.8556e-01,
        -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.2066e-01, -1.0000e+09, -1.6257e-01, -2.2173e-01,
        -1.0000e+09, -7.9537e-02, -2.9843e-01, -1.0000e+09, -1.2520e-01,
        -4.0874e-01, -1.0000e+09,  1.4015e-03, -1.6246e-01, -4.9402e-01,
        -2.1200e-01, -3.4513e-01, -1.0000e+09, -7.9537e-02, -4.3756e-01,
        -1.0000e+09, -8.0235e-02, -3.4693e-01, -1.0000e+09, -1.7709e-01,
        -4.3756e-01, -1.0000e+09, -1.1020e-01, -2.7950e-01, -1.0000e+09,
        -5.1681e-02, -3.7728e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.2066e-01, -1.0000e+09, -1.6257e-01, -2.2173e-01,
        -1.0000e+09, -7.9537e-02, -2.9843e-01, -1.0000e+09, -1.2520e-01,
        -4.0874e-01, -1.0000e+09,  1.4015e-03, -1.6246e-01, -4.9402e-01,
        -2.1200e-01, -3.4513e-01, -1.0000e+09, -7.9537e-02, -4.3756e-01,
        -1.0000e+09, -8.0235e-02, -3.4693e-01, -1.0000e+09, -1.7709e-01,
        -4.3756e-01, -1.0000e+09, -1.1020e-01, -2.7950e-01, -1.0000e+09,
        -5.1681e-02, -3.7728e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.2066e-01, -1.0000e+09, -1.6257e-01, -2.2173e-01,
        -1.0000e+09, -7.9537e-02, -2.9843e-01, -1.0000e+09, -1.2520e-01,
        -4.0874e-01, -1.0000e+09,  1.4015e-03, -1.6246e-01, -4.9402e-01,
        -2.1200e-01, -3.4513e-01, -1.0000e+09, -7.9537e-02, -4.3756e-01,
        -1.0000e+09, -8.0235e-02, -3.4693e-01, -1.0000e+09, -1.7709e-01,
        -4.3756e-01, -1.0000e+09, -1.1020e-01, -2.7950e-01, -1.0000e+09,
        -5.1681e-02, -3.7728e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.2066e-01, -1.0000e+09, -1.6257e-01, -2.2173e-01,
        -1.0000e+09, -7.9537e-02, -2.9843e-01, -1.0000e+09, -1.2520e-01,
        -4.0874e-01, -1.0000e+09,  1.4015e-03, -1.6246e-01, -4.9402e-01,
        -2.1200e-01, -3.4513e-01, -1.0000e+09, -7.9537e-02, -4.3756e-01,
        -1.0000e+09, -8.0235e-02, -3.4693e-01, -1.0000e+09, -1.7709e-01,
        -4.3756e-01, -1.0000e+09, -1.1020e-01, -2.7950e-01, -1.0000e+09,
        -5.1681e-02, -3.7728e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.2066e-01, -1.0000e+09, -1.6257e-01, -2.2173e-01,
        -1.0000e+09, -7.9537e-02, -2.9843e-01, -1.0000e+09, -1.2520e-01,
        -4.0874e-01, -1.0000e+09,  1.4015e-03, -1.6246e-01, -4.9402e-01,
        -2.1200e-01, -3.4513e-01, -1.0000e+09, -7.9537e-02, -4.3756e-01,
        -1.0000e+09, -8.0235e-02, -3.4693e-01, -1.0000e+09, -1.7709e-01,
        -4.3756e-01, -1.0000e+09, -1.1020e-01, -2.7950e-01, -1.0000e+09,
        -5.1681e-02, -3.7728e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.2066e-01, -1.0000e+09, -1.6257e-01, -2.2173e-01,
        -1.0000e+09, -7.9537e-02, -2.9843e-01, -1.0000e+09, -1.2520e-01,
        -4.0874e-01, -1.0000e+09,  1.4015e-03, -1.6246e-01, -4.9402e-01,
        -2.1200e-01, -3.4513e-01, -1.0000e+09, -7.9537e-02, -4.3756e-01,
        -1.0000e+09, -8.0235e-02, -3.4693e-01, -1.0000e+09, -1.7709e-01,
        -4.3756e-01, -1.0000e+09, -1.1020e-01, -2.7950e-01, -1.0000e+09,
        -5.1681e-02, -3.7728e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.2066e-01, -1.0000e+09, -1.6257e-01, -2.2173e-01,
        -1.0000e+09, -7.9537e-02, -2.9843e-01, -1.0000e+09, -1.2520e-01,
        -4.0874e-01, -1.0000e+09,  1.4015e-03, -1.6246e-01, -4.9402e-01,
        -2.1200e-01, -3.4513e-01, -1.0000e+09, -7.9537e-02, -4.3756e-01,
        -1.0000e+09, -8.0235e-02, -3.4693e-01, -1.0000e+09, -1.7709e-01,
        -4.3756e-01, -1.0000e+09, -1.1020e-01, -2.7950e-01, -1.0000e+09,
        -5.1681e-02, -3.7728e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.2066e-01, -1.0000e+09, -1.6257e-01, -2.2173e-01,
        -1.0000e+09, -7.9537e-02, -2.9843e-01, -1.0000e+09, -1.2520e-01,
        -4.0874e-01, -1.0000e+09,  1.4015e-03, -1.6246e-01, -4.9402e-01,
        -2.1200e-01, -3.4513e-01, -1.0000e+09, -7.9537e-02, -4.3756e-01,
        -1.0000e+09, -8.0235e-02, -3.4693e-01, -1.0000e+09, -1.7709e-01,
        -4.3756e-01, -1.0000e+09, -1.1020e-01, -2.7950e-01, -1.0000e+09,
        -5.1681e-02, -3.7728e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.2066e-01, -1.0000e+09, -1.6257e-01, -2.2173e-01,
        -1.0000e+09, -7.9537e-02, -2.9843e-01, -1.0000e+09, -1.2520e-01,
        -4.0874e-01, -1.0000e+09,  1.4015e-03, -1.6246e-01, -4.9402e-01,
        -2.1200e-01, -3.4513e-01, -1.0000e+09, -7.9537e-02, -4.3756e-01,
        -1.0000e+09, -8.0235e-02, -3.4693e-01, -1.0000e+09, -1.7709e-01,
        -4.3756e-01, -1.0000e+09, -1.1020e-01, -2.7950e-01, -1.0000e+09,
        -5.1681e-02, -3.7728e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.2066e-01, -1.0000e+09, -1.6257e-01, -2.2173e-01,
        -1.0000e+09, -7.9537e-02, -2.9843e-01, -1.0000e+09, -1.2520e-01,
        -4.0874e-01, -1.0000e+09,  1.4015e-03, -1.6246e-01, -4.9402e-01,
        -2.1200e-01, -3.4513e-01, -1.0000e+09, -7.9537e-02, -4.3756e-01,
        -1.0000e+09, -8.0235e-02, -3.4693e-01, -1.0000e+09, -1.7709e-01,
        -4.3756e-01, -1.0000e+09, -1.1020e-01, -2.7950e-01, -1.0000e+09,
        -5.1681e-02, -3.7728e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.2066e-01, -1.0000e+09, -1.6257e-01, -2.2173e-01,
        -1.0000e+09, -7.9537e-02, -2.9843e-01, -1.0000e+09, -1.2520e-01,
        -4.0874e-01, -1.0000e+09,  1.4015e-03, -1.6246e-01, -4.9402e-01,
        -2.1200e-01, -3.4513e-01, -1.0000e+09, -7.9537e-02, -4.3756e-01,
        -1.0000e+09, -8.0235e-02, -3.4693e-01, -1.0000e+09, -1.7709e-01,
        -4.3756e-01, -1.0000e+09, -1.1020e-01, -2.7950e-01, -1.0000e+09,
        -5.1681e-02, -3.7728e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.2066e-01, -1.0000e+09, -1.6257e-01, -2.2173e-01,
        -1.0000e+09, -7.9537e-02, -2.9843e-01, -1.0000e+09, -1.2520e-01,
        -4.0874e-01, -1.0000e+09,  1.4015e-03, -1.6246e-01, -4.9402e-01,
        -2.1200e-01, -3.4513e-01, -1.0000e+09, -7.9537e-02, -4.3756e-01,
        -1.0000e+09, -8.0235e-02, -3.4693e-01, -1.0000e+09, -1.7709e-01,
        -4.3756e-01, -1.0000e+09, -1.1020e-01, -2.7950e-01, -1.0000e+09,
        -5.1681e-02, -3.7728e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-0.1035, -0.1484, -0.1347, -0.0896, -0.0218, -0.1242, -0.1524, -0.0303,
        -0.0218, -0.1715, -0.1310, -0.0555, -0.0218, -0.1139, -0.1313, -0.1102,
         0.0171, -0.0521, -0.0896, -0.0856, -0.0856, -0.0998, -0.0586, -0.0653,
        -0.1299, -0.1308, -0.1759, -0.1289, -0.1941, -0.0954, -0.2127, -0.0945,
        -0.1525,  0.1856, -0.1759, -0.1759, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.1484, -0.1347, -0.0896, -0.0218, -0.1242, -0.1524, -0.0303,
        -0.0218, -0.1715, -0.1310, -0.0555, -0.0218, -0.1139, -0.1313, -

Tokens embeddings:
tensor([-0.1035, -0.1484, -0.1347, -0.0896, -0.0218, -0.1242, -0.1524, -0.0303,
        -0.0218, -0.1715, -0.1310, -0.0555, -0.0218, -0.1139, -0.1313, -0.1102,
         0.0171, -0.0521, -0.0896, -0.0856, -0.0856, -0.0998, -0.0586, -0.0653,
        -0.1299, -0.1308, -0.1759, -0.1289, -0.1941, -0.0954, -0.2127, -0.0945,
        -0.1525,  0.1856, -0.1759, -0.1759, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.1484, -0.1347, -0.0896, -0.0218, -0.1242, -0.1524, -0.0303,
        -0.0218, -0.1715, -0.1310, -0.0555, -0.0218, -0.1139, -0.1313, -

Tokens embeddings:
tensor([-0.1035, -0.1484, -0.1347, -0.0896, -0.0218, -0.1242, -0.1524, -0.0303,
        -0.0218, -0.1715, -0.1310, -0.0555, -0.0218, -0.1139, -0.1313, -0.1102,
         0.0171, -0.0521, -0.0896, -0.0856, -0.0856, -0.0998, -0.0586, -0.0653,
        -0.1299, -0.1308, -0.1759, -0.1289, -0.1941, -0.0954, -0.2127, -0.0945,
        -0.1525,  0.1856, -0.1759, -0.1759, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.1484, -0.1347, -0.0896, -0.0218, -0.1242, -0.1524, -0.0303,
        -0.0218, -0.1715, -0.1310, -0.0555, -0.0218, -0.1139, -0.1313, -

Tokens embeddings:
tensor([-0.1035, -0.1484, -0.1347, -0.0896, -0.0218, -0.1242, -0.1524, -0.0303,
        -0.0218, -0.1715, -0.1310, -0.0555, -0.0218, -0.1139, -0.1313, -0.1102,
         0.0171, -0.0521, -0.0896, -0.0856, -0.0856, -0.0998, -0.0586, -0.0653,
        -0.1299, -0.1308, -0.1759, -0.1289, -0.1941, -0.0954, -0.2127, -0.0945,
        -0.1525,  0.1856, -0.1759, -0.1759, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.1484, -0.1347, -0.0896, -0.0218, -0.1242, -0.1524, -0.0303,
        -0.0218, -0.1715, -0.1310, -0.0555, -0.0218, -0.1139, -0.1313, -

Tokens embeddings:
tensor([-0.1035, -0.1484, -0.1347, -0.0896, -0.0218, -0.1242, -0.1524, -0.0303,
        -0.0218, -0.1715, -0.1310, -0.0555, -0.0218, -0.1139, -0.1313, -0.1102,
         0.0171, -0.0521, -0.0896, -0.0856, -0.0856, -0.0998, -0.0586, -0.0653,
        -0.1299, -0.1308, -0.1759, -0.1289, -0.1941, -0.0954, -0.2127, -0.0945,
        -0.1525,  0.1856, -0.1759, -0.1759, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.1484, -0.1347, -0.0896, -0.0218, -0.1242, -0.1524, -0.0303,
        -0.0218, -0.1715, -0.1310, -0.0555, -0.0218, -0.1139, -0.1313, -

Tokens embeddings:
tensor([-0.1035, -0.1484, -0.1347, -0.0896, -0.0218, -0.1242, -0.1524, -0.0303,
        -0.0218, -0.1715, -0.1310, -0.0555, -0.0218, -0.1139, -0.1313, -0.1102,
         0.0171, -0.0521, -0.0896, -0.0856, -0.0856, -0.0998, -0.0586, -0.0653,
        -0.1299, -0.1308, -0.1759, -0.1289, -0.1941, -0.0954, -0.2127, -0.0945,
        -0.1525,  0.1856, -0.1759, -0.1759, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.1484, -0.1347, -0.0896, -0.0218, -0.1242, -0.1524, -0.0303,
        -0.0218, -0.1715, -0.1310, -0.0555, -0.0218, -0.1139, -0.1313, -

Tokens embeddings:
tensor([-0.1035, -0.1484, -0.1347, -0.0896, -0.0218, -0.1242, -0.1524, -0.0303,
        -0.0218, -0.1715, -0.1310, -0.0555, -0.0218, -0.1139, -0.1313, -0.1102,
         0.0171, -0.0521, -0.0896, -0.0856, -0.0856, -0.0998, -0.0586, -0.0653,
        -0.1299, -0.1308, -0.1759, -0.1289, -0.1941, -0.0954, -0.2127, -0.0945,
        -0.1525,  0.1856, -0.1759, -0.1759, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.1484, -0.1347, -0.0896, -0.0218, -0.1242, -0.1524, -0.0303,
        -0.0218, -0.1715, -0.1310, -0.0555, -0.0218, -0.1139, -0.1313, -

Tokens embeddings:
tensor([-0.1035, -0.1484, -0.1347, -0.0896, -0.0218, -0.1242, -0.1524, -0.0303,
        -0.0218, -0.1715, -0.1310, -0.0555, -0.0218, -0.1139, -0.1313, -0.1102,
         0.0171, -0.0521, -0.0896, -0.0856, -0.0856, -0.0998, -0.0586, -0.0653,
        -0.1299, -0.1308, -0.1759, -0.1289, -0.1941, -0.0954, -0.2127, -0.0945,
        -0.1525,  0.1856, -0.1759, -0.1759, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.1484, -0.1347, -0.0896, -0.0218, -0.1242, -0.1524, -0.0303,
        -0.0218, -0.1715, -0.1310, -0.0555, -0.0218, -0.1139, -0.1313, -

Tokens embeddings:
tensor([-0.1035, -0.1484, -0.1347, -0.0896, -0.0218, -0.1242, -0.1524, -0.0303,
        -0.0218, -0.1715, -0.1310, -0.0555, -0.0218, -0.1139, -0.1313, -0.1102,
         0.0171, -0.0521, -0.0896, -0.0856, -0.0856, -0.0998, -0.0586, -0.0653,
        -0.1299, -0.1308, -0.1759, -0.1289, -0.1941, -0.0954, -0.2127, -0.0945,
        -0.1525,  0.1856, -0.1759, -0.1759, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.1484, -0.1347, -0.0896, -0.0218, -0.1242, -0.1524, -0.0303,
        -0.0218, -0.1715, -0.1310, -0.0555, -0.0218, -0.1139, -0.1313, -

Tokens embeddings:
tensor([-0.1035, -0.1484, -0.1347, -0.0896, -0.0218, -0.1242, -0.1524, -0.0303,
        -0.0218, -0.1715, -0.1310, -0.0555, -0.0218, -0.1139, -0.1313, -0.1102,
         0.0171, -0.0521, -0.0896, -0.0856, -0.0856, -0.0998, -0.0586, -0.0653,
        -0.1299, -0.1308, -0.1759, -0.1289, -0.1941, -0.0954, -0.2127, -0.0945,
        -0.1525,  0.1856, -0.1759, -0.1759, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.1484, -0.1347, -0.0896, -0.0218, -0.1242, -0.1524, -0.0303,
        -0.0218, -0.1715, -0.1310, -0.0555, -0.0218, -0.1139, -0.1313, -

Tokens embeddings:
tensor([-0.1035, -0.1484, -0.1347, -0.0896, -0.0218, -0.1242, -0.1524, -0.0303,
        -0.0218, -0.1715, -0.1310, -0.0555, -0.0218, -0.1139, -0.1313, -0.1102,
         0.0171, -0.0521, -0.0896, -0.0856, -0.0856, -0.0998, -0.0586, -0.0653,
        -0.1299, -0.1308, -0.1759, -0.1289, -0.1941, -0.0954, -0.2127, -0.0945,
        -0.1525,  0.1856, -0.1759, -0.1759, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.1484, -0.1347, -0.0896, -0.0218, -0.1242, -0.1524, -0.0303,
        -0.0218, -0.1715, -0.1310, -0.0555, -0.0218, -0.1139, -0.1313, -

Tokens embeddings:
tensor([-0.1035, -0.1484, -0.1347, -0.0896, -0.0218, -0.1242, -0.1524, -0.0303,
        -0.0218, -0.1715, -0.1310, -0.0555, -0.0218, -0.1139, -0.1313, -0.1102,
         0.0171, -0.0521, -0.0896, -0.0856, -0.0856, -0.0998, -0.0586, -0.0653,
        -0.1299, -0.1308, -0.1759, -0.1289, -0.1941, -0.0954, -0.2127, -0.0945,
        -0.1525,  0.1856, -0.1759, -0.1759, -0.0401, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.1484, -0.1347, -0.0896, -0.0218, -0.1242, -0.1524, -0.0303,
        -0.0218, -0.1715, -0.1310, -0.0555, -0.0218, -0.1139, -0.1313, -

Tokens embeddings:
tensor([-0.1035, -0.0582, -0.0379, -0.0990, -0.1582, -0.1096, -0.0534, -0.0906,
        -0.0592, -0.1440, -0.0040, -0.1069, -0.0448, -0.1859, -0.0786, -0.1706,
        -0.0896, -0.1524, -0.0996, -0.1194, -0.1395, -0.1261, -0.1771, -0.1194,
        -0.1259,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0582, -0.0379, -0.0990, -0.1582, -0.1096, -0.0534, -0.0906,
        -0.0592, -0.1440, -0.0040, -0.1069, -0.0448, -0.1859, -0.0786, -

Tokens embeddings:
tensor([-0.1035, -0.0582, -0.0379, -0.0990, -0.1582, -0.1096, -0.0534, -0.0906,
        -0.0592, -0.1440, -0.0040, -0.1069, -0.0448, -0.1859, -0.0786, -0.1706,
        -0.0896, -0.1524, -0.0996, -0.1194, -0.1395, -0.1261, -0.1771, -0.1194,
        -0.1259,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0582, -0.0379, -0.0990, -0.1582, -0.1096, -0.0534, -0.0906,
        -0.0592, -0.1440, -0.0040, -0.1069, -0.0448, -0.1859, -0.0786, -

Tokens embeddings:
tensor([-0.1035, -0.0582, -0.0379, -0.0990, -0.1582, -0.1096, -0.0534, -0.0906,
        -0.0592, -0.1440, -0.0040, -0.1069, -0.0448, -0.1859, -0.0786, -0.1706,
        -0.0896, -0.1524, -0.0996, -0.1194, -0.1395, -0.1261, -0.1771, -0.1194,
        -0.1259,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0582, -0.0379, -0.0990, -0.1582, -0.1096, -0.0534, -0.0906,
        -0.0592, -0.1440, -0.0040, -0.1069, -0.0448, -0.1859, -0.0786, -

Tokens embeddings:
tensor([-0.1035, -0.0582, -0.0379, -0.0990, -0.1582, -0.1096, -0.0534, -0.0906,
        -0.0592, -0.1440, -0.0040, -0.1069, -0.0448, -0.1859, -0.0786, -0.1706,
        -0.0896, -0.1524, -0.0996, -0.1194, -0.1395, -0.1261, -0.1771, -0.1194,
        -0.1259,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0582, -0.0379, -0.0990, -0.1582, -0.1096, -0.0534, -0.0906,
        -0.0592, -0.1440, -0.0040, -0.1069, -0.0448, -0.1859, -0.0786, -

Tokens embeddings:
tensor([-0.1035, -0.0582, -0.0379, -0.0990, -0.1582, -0.1096, -0.0534, -0.0906,
        -0.0592, -0.1440, -0.0040, -0.1069, -0.0448, -0.1859, -0.0786, -0.1706,
        -0.0896, -0.1524, -0.0996, -0.1194, -0.1395, -0.1261, -0.1771, -0.1194,
        -0.1259,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0582, -0.0379, -0.0990, -0.1582, -0.1096, -0.0534, -0.0906,
        -0.0592, -0.1440, -0.0040, -0.1069, -0.0448, -0.1859, -0.0786, -

Tokens embeddings:
tensor([-0.1035, -0.0582, -0.0379, -0.0990, -0.1582, -0.1096, -0.0534, -0.0906,
        -0.0592, -0.1440, -0.0040, -0.1069, -0.0448, -0.1859, -0.0786, -0.1706,
        -0.0896, -0.1524, -0.0996, -0.1194, -0.1395, -0.1261, -0.1771, -0.1194,
        -0.1259,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0582, -0.0379, -0.0990, -0.1582, -0.1096, -0.0534, -0.0906,
        -0.0592, -0.1440, -0.0040, -0.1069, -0.0448, -0.1859, -0.0786, -

Tokens embeddings:
tensor([-0.1035, -0.0582, -0.0379, -0.0990, -0.1582, -0.1096, -0.0534, -0.0906,
        -0.0592, -0.1440, -0.0040, -0.1069, -0.0448, -0.1859, -0.0786, -0.1706,
        -0.0896, -0.1524, -0.0996, -0.1194, -0.1395, -0.1261, -0.1771, -0.1194,
        -0.1259,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0582, -0.0379, -0.0990, -0.1582, -0.1096, -0.0534, -0.0906,
        -0.0592, -0.1440, -0.0040, -0.1069, -0.0448, -0.1859, -0.0786, -

Tokens embeddings:
tensor([-0.1035, -0.0582, -0.0379, -0.0990, -0.1582, -0.1096, -0.0534, -0.0906,
        -0.0592, -0.1440, -0.0040, -0.1069, -0.0448, -0.1859, -0.0786, -0.1706,
        -0.0896, -0.1524, -0.0996, -0.1194, -0.1395, -0.1261, -0.1771, -0.1194,
        -0.1259,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0582, -0.0379, -0.0990, -0.1582, -0.1096, -0.0534, -0.0906,
        -0.0592, -0.1440, -0.0040, -0.1069, -0.0448, -0.1859, -0.0786, -

Tokens embeddings:
tensor([-0.1035, -0.0582, -0.0379, -0.0990, -0.1582, -0.1096, -0.0534, -0.0906,
        -0.0592, -0.1440, -0.0040, -0.1069, -0.0448, -0.1859, -0.0786, -0.1706,
        -0.0896, -0.1524, -0.0996, -0.1194, -0.1395, -0.1261, -0.1771, -0.1194,
        -0.1259,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0582, -0.0379, -0.0990, -0.1582, -0.1096, -0.0534, -0.0906,
        -0.0592, -0.1440, -0.0040, -0.1069, -0.0448, -0.1859, -0.0786, -

Tokens embeddings:
tensor([-0.1035, -0.0582, -0.0379, -0.0990, -0.1582, -0.1096, -0.0534, -0.0906,
        -0.0592, -0.1440, -0.0040, -0.1069, -0.0448, -0.1859, -0.0786, -0.1706,
        -0.0896, -0.1524, -0.0996, -0.1194, -0.1395, -0.1261, -0.1771, -0.1194,
        -0.1259,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0582, -0.0379, -0.0990, -0.1582, -0.1096, -0.0534, -0.0906,
        -0.0592, -0.1440, -0.0040, -0.1069, -0.0448, -0.1859, -0.0786, -

Tokens embeddings:
tensor([-0.1035, -0.0582, -0.0379, -0.0990, -0.1582, -0.1096, -0.0534, -0.0906,
        -0.0592, -0.1440, -0.0040, -0.1069, -0.0448, -0.1859, -0.0786, -0.1706,
        -0.0896, -0.1524, -0.0996, -0.1194, -0.1395, -0.1261, -0.1771, -0.1194,
        -0.1259,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0582, -0.0379, -0.0990, -0.1582, -0.1096, -0.0534, -0.0906,
        -0.0592, -0.1440, -0.0040, -0.1069, -0.0448, -0.1859, -0.0786, -

Tokens embeddings:
tensor([-0.1035, -0.0582, -0.0379, -0.0990, -0.1582, -0.1096, -0.0534, -0.0906,
        -0.0592, -0.1440, -0.0040, -0.1069, -0.0448, -0.1859, -0.0786, -0.1706,
        -0.0896, -0.1524, -0.0996, -0.1194, -0.1395, -0.1261, -0.1771, -0.1194,
        -0.1259,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0582, -0.0379, -0.0990, -0.1582, -0.1096, -0.0534, -0.0906,
        -0.0592, -0.1440, -0.0040, -0.1069, -0.0448, -0.1859, -0.0786, -

In [32]:
# Pooling First token [CLS] for each sentence - argmax

# Select some sentences randomly
sent_index = [11, 12, 13, 14, 15, 16, 17, 18, 19, 20]

for s in sent_index:
  print("*" * 100)
  # Get the sentence's words
  tokens = test_inputs[s]
  # For each layer...
  for l in range(12):
    print("\nLayer", l+1)
    attention = np.squeeze(test_attentions[s][l].detach().cpu().numpy(), axis=0)
    # and for each head
    #for h, head in enumerate(attention):
    #print("Head", h+1)
    # Get the attention for the cls token
    encoded_tokens = bert_tokenizer(tokens, padding=True, truncation=True, max_length=128, return_tensors='pt')
    encoded_tokens = encoded_tokens.to(device)
    with torch.no_grad():
      model_output1 = model_e(**encoded_tokens)
      tokens_embeddings = model_output1[0][:,0]
      tokens_embeddings = tokens_embeddings.cpu()
      cls_attentions = head[0]
      display(HTML(colorize(tokens, cls_attentions)))
      print("Tokens embeddings:")
      print(tokens_embeddings)
      arg = argmax(tokens_embeddings)
      print('arg max of %s: %d' % (tokens_embeddings, arg))

****************************************************************************************************

Layer 1


Tokens embeddings:
tensor([-0.1035, -0.0592, -0.0592, -0.4183, -0.1267, -0.2803, -0.1741, -0.2192,
        -0.3302, -0.3115, -0.2911, -0.1685, -0.3226,  0.1205, -0.2808, -0.3196,
        -0.2364,  0.0208, -0.3115, -0.3707, -0.3398, -0.3335, -0.3857, -0.3761,
        -0.4147, -0.0896, -0.0683, -0.3533,  0.1856, -0.2364, -0.1595, -0.2315,
        -0.2269, -0.2213, -0.3533,  0.1856, -0.3253, -0.2583, -0.2872, -0.0435,
        -0.1741, -0.5278, -0.4183, -0.1952, -0.3574, -0.1803,  0.0432, -0.3196,
        -0.1741, -0.2319,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0592, -0.0592, -0.4183, -0.1267, -0.2803, -0.1741, -0.2192,
        -0.3302, -0.3115, -0.2911, -0.1685, -0.3226,  0.1205, -0.2808, -

Tokens embeddings:
tensor([-0.1035, -0.0592, -0.0592, -0.4183, -0.1267, -0.2803, -0.1741, -0.2192,
        -0.3302, -0.3115, -0.2911, -0.1685, -0.3226,  0.1205, -0.2808, -0.3196,
        -0.2364,  0.0208, -0.3115, -0.3707, -0.3398, -0.3335, -0.3857, -0.3761,
        -0.4147, -0.0896, -0.0683, -0.3533,  0.1856, -0.2364, -0.1595, -0.2315,
        -0.2269, -0.2213, -0.3533,  0.1856, -0.3253, -0.2583, -0.2872, -0.0435,
        -0.1741, -0.5278, -0.4183, -0.1952, -0.3574, -0.1803,  0.0432, -0.3196,
        -0.1741, -0.2319,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0592, -0.0592, -0.4183, -0.1267, -0.2803, -0.1741, -0.2192,
        -0.3302, -0.3115, -0.2911, -0.1685, -0.3226,  0.1205, -0.2808, -

Tokens embeddings:
tensor([-0.1035, -0.0592, -0.0592, -0.4183, -0.1267, -0.2803, -0.1741, -0.2192,
        -0.3302, -0.3115, -0.2911, -0.1685, -0.3226,  0.1205, -0.2808, -0.3196,
        -0.2364,  0.0208, -0.3115, -0.3707, -0.3398, -0.3335, -0.3857, -0.3761,
        -0.4147, -0.0896, -0.0683, -0.3533,  0.1856, -0.2364, -0.1595, -0.2315,
        -0.2269, -0.2213, -0.3533,  0.1856, -0.3253, -0.2583, -0.2872, -0.0435,
        -0.1741, -0.5278, -0.4183, -0.1952, -0.3574, -0.1803,  0.0432, -0.3196,
        -0.1741, -0.2319,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0592, -0.0592, -0.4183, -0.1267, -0.2803, -0.1741, -0.2192,
        -0.3302, -0.3115, -0.2911, -0.1685, -0.3226,  0.1205, -0.2808, -

Tokens embeddings:
tensor([-0.1035, -0.0592, -0.0592, -0.4183, -0.1267, -0.2803, -0.1741, -0.2192,
        -0.3302, -0.3115, -0.2911, -0.1685, -0.3226,  0.1205, -0.2808, -0.3196,
        -0.2364,  0.0208, -0.3115, -0.3707, -0.3398, -0.3335, -0.3857, -0.3761,
        -0.4147, -0.0896, -0.0683, -0.3533,  0.1856, -0.2364, -0.1595, -0.2315,
        -0.2269, -0.2213, -0.3533,  0.1856, -0.3253, -0.2583, -0.2872, -0.0435,
        -0.1741, -0.5278, -0.4183, -0.1952, -0.3574, -0.1803,  0.0432, -0.3196,
        -0.1741, -0.2319,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0592, -0.0592, -0.4183, -0.1267, -0.2803, -0.1741, -0.2192,
        -0.3302, -0.3115, -0.2911, -0.1685, -0.3226,  0.1205, -0.2808, -

Tokens embeddings:
tensor([-0.1035, -0.0592, -0.0592, -0.4183, -0.1267, -0.2803, -0.1741, -0.2192,
        -0.3302, -0.3115, -0.2911, -0.1685, -0.3226,  0.1205, -0.2808, -0.3196,
        -0.2364,  0.0208, -0.3115, -0.3707, -0.3398, -0.3335, -0.3857, -0.3761,
        -0.4147, -0.0896, -0.0683, -0.3533,  0.1856, -0.2364, -0.1595, -0.2315,
        -0.2269, -0.2213, -0.3533,  0.1856, -0.3253, -0.2583, -0.2872, -0.0435,
        -0.1741, -0.5278, -0.4183, -0.1952, -0.3574, -0.1803,  0.0432, -0.3196,
        -0.1741, -0.2319,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0592, -0.0592, -0.4183, -0.1267, -0.2803, -0.1741, -0.2192,
        -0.3302, -0.3115, -0.2911, -0.1685, -0.3226,  0.1205, -0.2808, -

Tokens embeddings:
tensor([-0.1035, -0.0592, -0.0592, -0.4183, -0.1267, -0.2803, -0.1741, -0.2192,
        -0.3302, -0.3115, -0.2911, -0.1685, -0.3226,  0.1205, -0.2808, -0.3196,
        -0.2364,  0.0208, -0.3115, -0.3707, -0.3398, -0.3335, -0.3857, -0.3761,
        -0.4147, -0.0896, -0.0683, -0.3533,  0.1856, -0.2364, -0.1595, -0.2315,
        -0.2269, -0.2213, -0.3533,  0.1856, -0.3253, -0.2583, -0.2872, -0.0435,
        -0.1741, -0.5278, -0.4183, -0.1952, -0.3574, -0.1803,  0.0432, -0.3196,
        -0.1741, -0.2319,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0592, -0.0592, -0.4183, -0.1267, -0.2803, -0.1741, -0.2192,
        -0.3302, -0.3115, -0.2911, -0.1685, -0.3226,  0.1205, -0.2808, -

Tokens embeddings:
tensor([-0.1035, -0.0592, -0.0592, -0.4183, -0.1267, -0.2803, -0.1741, -0.2192,
        -0.3302, -0.3115, -0.2911, -0.1685, -0.3226,  0.1205, -0.2808, -0.3196,
        -0.2364,  0.0208, -0.3115, -0.3707, -0.3398, -0.3335, -0.3857, -0.3761,
        -0.4147, -0.0896, -0.0683, -0.3533,  0.1856, -0.2364, -0.1595, -0.2315,
        -0.2269, -0.2213, -0.3533,  0.1856, -0.3253, -0.2583, -0.2872, -0.0435,
        -0.1741, -0.5278, -0.4183, -0.1952, -0.3574, -0.1803,  0.0432, -0.3196,
        -0.1741, -0.2319,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0592, -0.0592, -0.4183, -0.1267, -0.2803, -0.1741, -0.2192,
        -0.3302, -0.3115, -0.2911, -0.1685, -0.3226,  0.1205, -0.2808, -

Tokens embeddings:
tensor([-0.1035, -0.0592, -0.0592, -0.4183, -0.1267, -0.2803, -0.1741, -0.2192,
        -0.3302, -0.3115, -0.2911, -0.1685, -0.3226,  0.1205, -0.2808, -0.3196,
        -0.2364,  0.0208, -0.3115, -0.3707, -0.3398, -0.3335, -0.3857, -0.3761,
        -0.4147, -0.0896, -0.0683, -0.3533,  0.1856, -0.2364, -0.1595, -0.2315,
        -0.2269, -0.2213, -0.3533,  0.1856, -0.3253, -0.2583, -0.2872, -0.0435,
        -0.1741, -0.5278, -0.4183, -0.1952, -0.3574, -0.1803,  0.0432, -0.3196,
        -0.1741, -0.2319,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0592, -0.0592, -0.4183, -0.1267, -0.2803, -0.1741, -0.2192,
        -0.3302, -0.3115, -0.2911, -0.1685, -0.3226,  0.1205, -0.2808, -

Tokens embeddings:
tensor([-0.1035, -0.0592, -0.0592, -0.4183, -0.1267, -0.2803, -0.1741, -0.2192,
        -0.3302, -0.3115, -0.2911, -0.1685, -0.3226,  0.1205, -0.2808, -0.3196,
        -0.2364,  0.0208, -0.3115, -0.3707, -0.3398, -0.3335, -0.3857, -0.3761,
        -0.4147, -0.0896, -0.0683, -0.3533,  0.1856, -0.2364, -0.1595, -0.2315,
        -0.2269, -0.2213, -0.3533,  0.1856, -0.3253, -0.2583, -0.2872, -0.0435,
        -0.1741, -0.5278, -0.4183, -0.1952, -0.3574, -0.1803,  0.0432, -0.3196,
        -0.1741, -0.2319,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0592, -0.0592, -0.4183, -0.1267, -0.2803, -0.1741, -0.2192,
        -0.3302, -0.3115, -0.2911, -0.1685, -0.3226,  0.1205, -0.2808, -

Tokens embeddings:
tensor([-0.1035, -0.0592, -0.0592, -0.4183, -0.1267, -0.2803, -0.1741, -0.2192,
        -0.3302, -0.3115, -0.2911, -0.1685, -0.3226,  0.1205, -0.2808, -0.3196,
        -0.2364,  0.0208, -0.3115, -0.3707, -0.3398, -0.3335, -0.3857, -0.3761,
        -0.4147, -0.0896, -0.0683, -0.3533,  0.1856, -0.2364, -0.1595, -0.2315,
        -0.2269, -0.2213, -0.3533,  0.1856, -0.3253, -0.2583, -0.2872, -0.0435,
        -0.1741, -0.5278, -0.4183, -0.1952, -0.3574, -0.1803,  0.0432, -0.3196,
        -0.1741, -0.2319,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0592, -0.0592, -0.4183, -0.1267, -0.2803, -0.1741, -0.2192,
        -0.3302, -0.3115, -0.2911, -0.1685, -0.3226,  0.1205, -0.2808, -

Tokens embeddings:
tensor([-0.1035, -0.0592, -0.0592, -0.4183, -0.1267, -0.2803, -0.1741, -0.2192,
        -0.3302, -0.3115, -0.2911, -0.1685, -0.3226,  0.1205, -0.2808, -0.3196,
        -0.2364,  0.0208, -0.3115, -0.3707, -0.3398, -0.3335, -0.3857, -0.3761,
        -0.4147, -0.0896, -0.0683, -0.3533,  0.1856, -0.2364, -0.1595, -0.2315,
        -0.2269, -0.2213, -0.3533,  0.1856, -0.3253, -0.2583, -0.2872, -0.0435,
        -0.1741, -0.5278, -0.4183, -0.1952, -0.3574, -0.1803,  0.0432, -0.3196,
        -0.1741, -0.2319,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0592, -0.0592, -0.4183, -0.1267, -0.2803, -0.1741, -0.2192,
        -0.3302, -0.3115, -0.2911, -0.1685, -0.3226,  0.1205, -0.2808, -

Tokens embeddings:
tensor([-0.1035, -0.0592, -0.0592, -0.4183, -0.1267, -0.2803, -0.1741, -0.2192,
        -0.3302, -0.3115, -0.2911, -0.1685, -0.3226,  0.1205, -0.2808, -0.3196,
        -0.2364,  0.0208, -0.3115, -0.3707, -0.3398, -0.3335, -0.3857, -0.3761,
        -0.4147, -0.0896, -0.0683, -0.3533,  0.1856, -0.2364, -0.1595, -0.2315,
        -0.2269, -0.2213, -0.3533,  0.1856, -0.3253, -0.2583, -0.2872, -0.0435,
        -0.1741, -0.5278, -0.4183, -0.1952, -0.3574, -0.1803,  0.0432, -0.3196,
        -0.1741, -0.2319,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0592, -0.0592, -0.4183, -0.1267, -0.2803, -0.1741, -0.2192,
        -0.3302, -0.3115, -0.2911, -0.1685, -0.3226,  0.1205, -0.2808, -

Tokens embeddings:
tensor([-0.1035, -0.4348, -0.3574, -0.4183, -0.4316,  0.0434, -0.1373, -0.2439,
        -0.3335, -0.4183, -0.2776, -0.3874, -0.4128, -0.4183, -0.1041, -0.0040,
        -0.4488, -0.3478, -0.3409, -0.2418, -0.3834, -0.3100, -0.2880, -0.1741,
        -0.5278, -0.3857, -0.3519, -0.3335, -0.4183, -0.2468,  0.1856, -0.0401,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4348, -0.3574, -0.4183, -0.4316,  0.0434, -0.1373, -0.2439,
        -0.3335, -0.4183, -0.2776, -0.3874, -0.4128, -0.4183, -0.1041, -

Tokens embeddings:
tensor([-0.1035, -0.4348, -0.3574, -0.4183, -0.4316,  0.0434, -0.1373, -0.2439,
        -0.3335, -0.4183, -0.2776, -0.3874, -0.4128, -0.4183, -0.1041, -0.0040,
        -0.4488, -0.3478, -0.3409, -0.2418, -0.3834, -0.3100, -0.2880, -0.1741,
        -0.5278, -0.3857, -0.3519, -0.3335, -0.4183, -0.2468,  0.1856, -0.0401,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4348, -0.3574, -0.4183, -0.4316,  0.0434, -0.1373, -0.2439,
        -0.3335, -0.4183, -0.2776, -0.3874, -0.4128, -0.4183, -0.1041, -

Tokens embeddings:
tensor([-0.1035, -0.4348, -0.3574, -0.4183, -0.4316,  0.0434, -0.1373, -0.2439,
        -0.3335, -0.4183, -0.2776, -0.3874, -0.4128, -0.4183, -0.1041, -0.0040,
        -0.4488, -0.3478, -0.3409, -0.2418, -0.3834, -0.3100, -0.2880, -0.1741,
        -0.5278, -0.3857, -0.3519, -0.3335, -0.4183, -0.2468,  0.1856, -0.0401,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4348, -0.3574, -0.4183, -0.4316,  0.0434, -0.1373, -0.2439,
        -0.3335, -0.4183, -0.2776, -0.3874, -0.4128, -0.4183, -0.1041, -

Tokens embeddings:
tensor([-0.1035, -0.4348, -0.3574, -0.4183, -0.4316,  0.0434, -0.1373, -0.2439,
        -0.3335, -0.4183, -0.2776, -0.3874, -0.4128, -0.4183, -0.1041, -0.0040,
        -0.4488, -0.3478, -0.3409, -0.2418, -0.3834, -0.3100, -0.2880, -0.1741,
        -0.5278, -0.3857, -0.3519, -0.3335, -0.4183, -0.2468,  0.1856, -0.0401,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4348, -0.3574, -0.4183, -0.4316,  0.0434, -0.1373, -0.2439,
        -0.3335, -0.4183, -0.2776, -0.3874, -0.4128, -0.4183, -0.1041, -

Tokens embeddings:
tensor([-0.1035, -0.4348, -0.3574, -0.4183, -0.4316,  0.0434, -0.1373, -0.2439,
        -0.3335, -0.4183, -0.2776, -0.3874, -0.4128, -0.4183, -0.1041, -0.0040,
        -0.4488, -0.3478, -0.3409, -0.2418, -0.3834, -0.3100, -0.2880, -0.1741,
        -0.5278, -0.3857, -0.3519, -0.3335, -0.4183, -0.2468,  0.1856, -0.0401,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4348, -0.3574, -0.4183, -0.4316,  0.0434, -0.1373, -0.2439,
        -0.3335, -0.4183, -0.2776, -0.3874, -0.4128, -0.4183, -0.1041, -

Tokens embeddings:
tensor([-0.1035, -0.4348, -0.3574, -0.4183, -0.4316,  0.0434, -0.1373, -0.2439,
        -0.3335, -0.4183, -0.2776, -0.3874, -0.4128, -0.4183, -0.1041, -0.0040,
        -0.4488, -0.3478, -0.3409, -0.2418, -0.3834, -0.3100, -0.2880, -0.1741,
        -0.5278, -0.3857, -0.3519, -0.3335, -0.4183, -0.2468,  0.1856, -0.0401,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4348, -0.3574, -0.4183, -0.4316,  0.0434, -0.1373, -0.2439,
        -0.3335, -0.4183, -0.2776, -0.3874, -0.4128, -0.4183, -0.1041, -

Tokens embeddings:
tensor([-0.1035, -0.4348, -0.3574, -0.4183, -0.4316,  0.0434, -0.1373, -0.2439,
        -0.3335, -0.4183, -0.2776, -0.3874, -0.4128, -0.4183, -0.1041, -0.0040,
        -0.4488, -0.3478, -0.3409, -0.2418, -0.3834, -0.3100, -0.2880, -0.1741,
        -0.5278, -0.3857, -0.3519, -0.3335, -0.4183, -0.2468,  0.1856, -0.0401,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4348, -0.3574, -0.4183, -0.4316,  0.0434, -0.1373, -0.2439,
        -0.3335, -0.4183, -0.2776, -0.3874, -0.4128, -0.4183, -0.1041, -

Tokens embeddings:
tensor([-0.1035, -0.4348, -0.3574, -0.4183, -0.4316,  0.0434, -0.1373, -0.2439,
        -0.3335, -0.4183, -0.2776, -0.3874, -0.4128, -0.4183, -0.1041, -0.0040,
        -0.4488, -0.3478, -0.3409, -0.2418, -0.3834, -0.3100, -0.2880, -0.1741,
        -0.5278, -0.3857, -0.3519, -0.3335, -0.4183, -0.2468,  0.1856, -0.0401,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4348, -0.3574, -0.4183, -0.4316,  0.0434, -0.1373, -0.2439,
        -0.3335, -0.4183, -0.2776, -0.3874, -0.4128, -0.4183, -0.1041, -

Tokens embeddings:
tensor([-0.1035, -0.4348, -0.3574, -0.4183, -0.4316,  0.0434, -0.1373, -0.2439,
        -0.3335, -0.4183, -0.2776, -0.3874, -0.4128, -0.4183, -0.1041, -0.0040,
        -0.4488, -0.3478, -0.3409, -0.2418, -0.3834, -0.3100, -0.2880, -0.1741,
        -0.5278, -0.3857, -0.3519, -0.3335, -0.4183, -0.2468,  0.1856, -0.0401,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4348, -0.3574, -0.4183, -0.4316,  0.0434, -0.1373, -0.2439,
        -0.3335, -0.4183, -0.2776, -0.3874, -0.4128, -0.4183, -0.1041, -

Tokens embeddings:
tensor([-0.1035, -0.4348, -0.3574, -0.4183, -0.4316,  0.0434, -0.1373, -0.2439,
        -0.3335, -0.4183, -0.2776, -0.3874, -0.4128, -0.4183, -0.1041, -0.0040,
        -0.4488, -0.3478, -0.3409, -0.2418, -0.3834, -0.3100, -0.2880, -0.1741,
        -0.5278, -0.3857, -0.3519, -0.3335, -0.4183, -0.2468,  0.1856, -0.0401,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4348, -0.3574, -0.4183, -0.4316,  0.0434, -0.1373, -0.2439,
        -0.3335, -0.4183, -0.2776, -0.3874, -0.4128, -0.4183, -0.1041, -

Tokens embeddings:
tensor([-0.1035, -0.4348, -0.3574, -0.4183, -0.4316,  0.0434, -0.1373, -0.2439,
        -0.3335, -0.4183, -0.2776, -0.3874, -0.4128, -0.4183, -0.1041, -0.0040,
        -0.4488, -0.3478, -0.3409, -0.2418, -0.3834, -0.3100, -0.2880, -0.1741,
        -0.5278, -0.3857, -0.3519, -0.3335, -0.4183, -0.2468,  0.1856, -0.0401,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4348, -0.3574, -0.4183, -0.4316,  0.0434, -0.1373, -0.2439,
        -0.3335, -0.4183, -0.2776, -0.3874, -0.4128, -0.4183, -0.1041, -

Tokens embeddings:
tensor([-0.1035, -0.4348, -0.3574, -0.4183, -0.4316,  0.0434, -0.1373, -0.2439,
        -0.3335, -0.4183, -0.2776, -0.3874, -0.4128, -0.4183, -0.1041, -0.0040,
        -0.4488, -0.3478, -0.3409, -0.2418, -0.3834, -0.3100, -0.2880, -0.1741,
        -0.5278, -0.3857, -0.3519, -0.3335, -0.4183, -0.2468,  0.1856, -0.0401,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4348, -0.3574, -0.4183, -0.4316,  0.0434, -0.1373, -0.2439,
        -0.3335, -0.4183, -0.2776, -0.3874, -0.4128, -0.4183, -0.1041, -

Tokens embeddings:
tensor([-0.1035, -0.3517, -0.2750,  0.1856, -0.2962, -0.3959, -0.0040, -0.3857,
        -0.0144, -0.2528, -0.3834, -0.3100, -0.2665, -0.3335, -0.4183, -0.1976,
        -0.0592, -0.2931, -0.2080, -0.0040, -0.4264, -0.4207,  0.1856, -0.3655,
        -0.0833, -0.0577, -0.1373, -0.4045, -0.4213, -0.3574, -0.3857, -0.4401,
        -0.3335, -0.3590, -0.2236, -0.4488, -0.3298, -0.3282, -0.1741, -0.3736,
        -0.3857, -0.4630, -0.3574, -0.4326, -0.4044, -0.3857, -0.3992, -0.0592,
        -0.3335, -0.0802,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3517, -0.2750,  0.1856, -0.2962, -0.3959, -0.0040, -0.3857,
        -0.0144, -0.2528, -0.3834, -0.3100, -0.2665, -0.3335, -0.4183, -

Tokens embeddings:
tensor([-0.1035, -0.3517, -0.2750,  0.1856, -0.2962, -0.3959, -0.0040, -0.3857,
        -0.0144, -0.2528, -0.3834, -0.3100, -0.2665, -0.3335, -0.4183, -0.1976,
        -0.0592, -0.2931, -0.2080, -0.0040, -0.4264, -0.4207,  0.1856, -0.3655,
        -0.0833, -0.0577, -0.1373, -0.4045, -0.4213, -0.3574, -0.3857, -0.4401,
        -0.3335, -0.3590, -0.2236, -0.4488, -0.3298, -0.3282, -0.1741, -0.3736,
        -0.3857, -0.4630, -0.3574, -0.4326, -0.4044, -0.3857, -0.3992, -0.0592,
        -0.3335, -0.0802,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3517, -0.2750,  0.1856, -0.2962, -0.3959, -0.0040, -0.3857,
        -0.0144, -0.2528, -0.3834, -0.3100, -0.2665, -0.3335, -0.4183, -

Tokens embeddings:
tensor([-0.1035, -0.3517, -0.2750,  0.1856, -0.2962, -0.3959, -0.0040, -0.3857,
        -0.0144, -0.2528, -0.3834, -0.3100, -0.2665, -0.3335, -0.4183, -0.1976,
        -0.0592, -0.2931, -0.2080, -0.0040, -0.4264, -0.4207,  0.1856, -0.3655,
        -0.0833, -0.0577, -0.1373, -0.4045, -0.4213, -0.3574, -0.3857, -0.4401,
        -0.3335, -0.3590, -0.2236, -0.4488, -0.3298, -0.3282, -0.1741, -0.3736,
        -0.3857, -0.4630, -0.3574, -0.4326, -0.4044, -0.3857, -0.3992, -0.0592,
        -0.3335, -0.0802,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3517, -0.2750,  0.1856, -0.2962, -0.3959, -0.0040, -0.3857,
        -0.0144, -0.2528, -0.3834, -0.3100, -0.2665, -0.3335, -0.4183, -

Tokens embeddings:
tensor([-0.1035, -0.3517, -0.2750,  0.1856, -0.2962, -0.3959, -0.0040, -0.3857,
        -0.0144, -0.2528, -0.3834, -0.3100, -0.2665, -0.3335, -0.4183, -0.1976,
        -0.0592, -0.2931, -0.2080, -0.0040, -0.4264, -0.4207,  0.1856, -0.3655,
        -0.0833, -0.0577, -0.1373, -0.4045, -0.4213, -0.3574, -0.3857, -0.4401,
        -0.3335, -0.3590, -0.2236, -0.4488, -0.3298, -0.3282, -0.1741, -0.3736,
        -0.3857, -0.4630, -0.3574, -0.4326, -0.4044, -0.3857, -0.3992, -0.0592,
        -0.3335, -0.0802,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3517, -0.2750,  0.1856, -0.2962, -0.3959, -0.0040, -0.3857,
        -0.0144, -0.2528, -0.3834, -0.3100, -0.2665, -0.3335, -0.4183, -

Tokens embeddings:
tensor([-0.1035, -0.3517, -0.2750,  0.1856, -0.2962, -0.3959, -0.0040, -0.3857,
        -0.0144, -0.2528, -0.3834, -0.3100, -0.2665, -0.3335, -0.4183, -0.1976,
        -0.0592, -0.2931, -0.2080, -0.0040, -0.4264, -0.4207,  0.1856, -0.3655,
        -0.0833, -0.0577, -0.1373, -0.4045, -0.4213, -0.3574, -0.3857, -0.4401,
        -0.3335, -0.3590, -0.2236, -0.4488, -0.3298, -0.3282, -0.1741, -0.3736,
        -0.3857, -0.4630, -0.3574, -0.4326, -0.4044, -0.3857, -0.3992, -0.0592,
        -0.3335, -0.0802,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3517, -0.2750,  0.1856, -0.2962, -0.3959, -0.0040, -0.3857,
        -0.0144, -0.2528, -0.3834, -0.3100, -0.2665, -0.3335, -0.4183, -

Tokens embeddings:
tensor([-0.1035, -0.3517, -0.2750,  0.1856, -0.2962, -0.3959, -0.0040, -0.3857,
        -0.0144, -0.2528, -0.3834, -0.3100, -0.2665, -0.3335, -0.4183, -0.1976,
        -0.0592, -0.2931, -0.2080, -0.0040, -0.4264, -0.4207,  0.1856, -0.3655,
        -0.0833, -0.0577, -0.1373, -0.4045, -0.4213, -0.3574, -0.3857, -0.4401,
        -0.3335, -0.3590, -0.2236, -0.4488, -0.3298, -0.3282, -0.1741, -0.3736,
        -0.3857, -0.4630, -0.3574, -0.4326, -0.4044, -0.3857, -0.3992, -0.0592,
        -0.3335, -0.0802,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3517, -0.2750,  0.1856, -0.2962, -0.3959, -0.0040, -0.3857,
        -0.0144, -0.2528, -0.3834, -0.3100, -0.2665, -0.3335, -0.4183, -

Tokens embeddings:
tensor([-0.1035, -0.3517, -0.2750,  0.1856, -0.2962, -0.3959, -0.0040, -0.3857,
        -0.0144, -0.2528, -0.3834, -0.3100, -0.2665, -0.3335, -0.4183, -0.1976,
        -0.0592, -0.2931, -0.2080, -0.0040, -0.4264, -0.4207,  0.1856, -0.3655,
        -0.0833, -0.0577, -0.1373, -0.4045, -0.4213, -0.3574, -0.3857, -0.4401,
        -0.3335, -0.3590, -0.2236, -0.4488, -0.3298, -0.3282, -0.1741, -0.3736,
        -0.3857, -0.4630, -0.3574, -0.4326, -0.4044, -0.3857, -0.3992, -0.0592,
        -0.3335, -0.0802,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3517, -0.2750,  0.1856, -0.2962, -0.3959, -0.0040, -0.3857,
        -0.0144, -0.2528, -0.3834, -0.3100, -0.2665, -0.3335, -0.4183, -

Tokens embeddings:
tensor([-0.1035, -0.3517, -0.2750,  0.1856, -0.2962, -0.3959, -0.0040, -0.3857,
        -0.0144, -0.2528, -0.3834, -0.3100, -0.2665, -0.3335, -0.4183, -0.1976,
        -0.0592, -0.2931, -0.2080, -0.0040, -0.4264, -0.4207,  0.1856, -0.3655,
        -0.0833, -0.0577, -0.1373, -0.4045, -0.4213, -0.3574, -0.3857, -0.4401,
        -0.3335, -0.3590, -0.2236, -0.4488, -0.3298, -0.3282, -0.1741, -0.3736,
        -0.3857, -0.4630, -0.3574, -0.4326, -0.4044, -0.3857, -0.3992, -0.0592,
        -0.3335, -0.0802,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3517, -0.2750,  0.1856, -0.2962, -0.3959, -0.0040, -0.3857,
        -0.0144, -0.2528, -0.3834, -0.3100, -0.2665, -0.3335, -0.4183, -

Tokens embeddings:
tensor([-0.1035, -0.3517, -0.2750,  0.1856, -0.2962, -0.3959, -0.0040, -0.3857,
        -0.0144, -0.2528, -0.3834, -0.3100, -0.2665, -0.3335, -0.4183, -0.1976,
        -0.0592, -0.2931, -0.2080, -0.0040, -0.4264, -0.4207,  0.1856, -0.3655,
        -0.0833, -0.0577, -0.1373, -0.4045, -0.4213, -0.3574, -0.3857, -0.4401,
        -0.3335, -0.3590, -0.2236, -0.4488, -0.3298, -0.3282, -0.1741, -0.3736,
        -0.3857, -0.4630, -0.3574, -0.4326, -0.4044, -0.3857, -0.3992, -0.0592,
        -0.3335, -0.0802,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3517, -0.2750,  0.1856, -0.2962, -0.3959, -0.0040, -0.3857,
        -0.0144, -0.2528, -0.3834, -0.3100, -0.2665, -0.3335, -0.4183, -

Tokens embeddings:
tensor([-0.1035, -0.3517, -0.2750,  0.1856, -0.2962, -0.3959, -0.0040, -0.3857,
        -0.0144, -0.2528, -0.3834, -0.3100, -0.2665, -0.3335, -0.4183, -0.1976,
        -0.0592, -0.2931, -0.2080, -0.0040, -0.4264, -0.4207,  0.1856, -0.3655,
        -0.0833, -0.0577, -0.1373, -0.4045, -0.4213, -0.3574, -0.3857, -0.4401,
        -0.3335, -0.3590, -0.2236, -0.4488, -0.3298, -0.3282, -0.1741, -0.3736,
        -0.3857, -0.4630, -0.3574, -0.4326, -0.4044, -0.3857, -0.3992, -0.0592,
        -0.3335, -0.0802,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3517, -0.2750,  0.1856, -0.2962, -0.3959, -0.0040, -0.3857,
        -0.0144, -0.2528, -0.3834, -0.3100, -0.2665, -0.3335, -0.4183, -

Tokens embeddings:
tensor([-0.1035, -0.3517, -0.2750,  0.1856, -0.2962, -0.3959, -0.0040, -0.3857,
        -0.0144, -0.2528, -0.3834, -0.3100, -0.2665, -0.3335, -0.4183, -0.1976,
        -0.0592, -0.2931, -0.2080, -0.0040, -0.4264, -0.4207,  0.1856, -0.3655,
        -0.0833, -0.0577, -0.1373, -0.4045, -0.4213, -0.3574, -0.3857, -0.4401,
        -0.3335, -0.3590, -0.2236, -0.4488, -0.3298, -0.3282, -0.1741, -0.3736,
        -0.3857, -0.4630, -0.3574, -0.4326, -0.4044, -0.3857, -0.3992, -0.0592,
        -0.3335, -0.0802,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3517, -0.2750,  0.1856, -0.2962, -0.3959, -0.0040, -0.3857,
        -0.0144, -0.2528, -0.3834, -0.3100, -0.2665, -0.3335, -0.4183, -

Tokens embeddings:
tensor([-0.1035, -0.3517, -0.2750,  0.1856, -0.2962, -0.3959, -0.0040, -0.3857,
        -0.0144, -0.2528, -0.3834, -0.3100, -0.2665, -0.3335, -0.4183, -0.1976,
        -0.0592, -0.2931, -0.2080, -0.0040, -0.4264, -0.4207,  0.1856, -0.3655,
        -0.0833, -0.0577, -0.1373, -0.4045, -0.4213, -0.3574, -0.3857, -0.4401,
        -0.3335, -0.3590, -0.2236, -0.4488, -0.3298, -0.3282, -0.1741, -0.3736,
        -0.3857, -0.4630, -0.3574, -0.4326, -0.4044, -0.3857, -0.3992, -0.0592,
        -0.3335, -0.0802,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3517, -0.2750,  0.1856, -0.2962, -0.3959, -0.0040, -0.3857,
        -0.0144, -0.2528, -0.3834, -0.3100, -0.2665, -0.3335, -0.4183, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.2924, -0.3568, -0.2814, -0.4255, -0.3760, -0.3811,
        -0.3644, -0.3583, -0.4012, -0.3301,  0.1856, -0.0401, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.2924, -0.3568, -0.2814, -0.4255, -0.3760, -0.3811,
        -0.3644, -0.3583, -0.4012, -0.3301,  0.1856, -0.0401, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.2924, -0.3568, -0.2814, -0.4255, -0.3760, -0.3811,
        -0.3644, -0.3583, -0.4012, -0.3301,  0.1856, -0.0401, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.2924, -0.3568, -0.2814, -0.4255, -0.3760, -0.3811,
        -0.3644, -0.3583, -0.4012, -0.3301,  0.1856, -0.0401, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.2924, -0.3568, -0.2814, -0.4255, -0.3760, -0.3811,
        -0.3644, -0.3583, -0.4012, -0.3301,  0.1856, -0.0401, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.2924, -0.3568, -0.2814, -0.4255, -0.3760, -0.3811,
        -0.3644, -0.3583, -0.4012, -0.3301,  0.1856, -0.0401, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.2924, -0.3568, -0.2814, -0.4255, -0.3760, -0.3811,
        -0.3644, -0.3583, -0.4012, -0.3301,  0.1856, -0.0401, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.2924, -0.3568, -0.2814, -0.4255, -0.3760, -0.3811,
        -0.3644, -0.3583, -0.4012, -0.3301,  0.1856, -0.0401, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.2924, -0.3568, -0.2814, -0.4255, -0.3760, -0.3811,
        -0.3644, -0.3583, -0.4012, -0.3301,  0.1856, -0.0401, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.2924, -0.3568, -0.2814, -0.4255, -0.3760, -0.3811,
        -0.3644, -0.3583, -0.4012, -0.3301,  0.1856, -0.0401, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.2924, -0.3568, -0.2814, -0.4255, -0.3760, -0.3811,
        -0.3644, -0.3583, -0.4012, -0.3301,  0.1856, -0.0401, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.2924, -0.3568, -0.2814, -0.4255, -0.3760, -0.3811,
        -0.3644, -0.3583, -0.4012, -0.3301,  0.1856, -0.0401, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.2924, -0.3568, -0.2814, -0.4255, -0.3760, -0.3811,
        -0.3644, -0.3583, -0.4012, -0.3301,  0.1856, -0.0401, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.2924, -0.3568, -0.2814, -0.4255, -0.3760, -0.3811,
        -0.3644, -0.3583, -0.4012, -0.3301,  0.1856, -0.0401, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.2924, -0.3568, -0.2814, -0.4255, -0.3760, -0.3811,
        -0.3644, -0.3583, -0.4012, -0.3301,  0.1856, -0.0401, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.2924, -0.3568, -0.2814, -0.4255, -0.3760, -0.3811,
        -0.3644, -0.3583, -0.4012, -0.3301,  0.1856, -0.0401, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.2924, -0.3568, -0.2814, -0.4255, -0.3760, -0.3811,
        -0.3644, -0.3583, -0.4012, -0.3301,  0.1856, -0.0401, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.2924, -0.3568, -0.2814, -0.4255, -0.3760, -0.3811,
        -0.3644, -0.3583, -0.4012, -0.3301,  0.1856, -0.0401, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.2924, -0.3568, -0.2814, -0.4255, -0.3760, -0.3811,
        -0.3644, -0.3583, -0.4012, -0.3301,  0.1856, -0.0401, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.2924, -0.3568, -0.2814, -0.4255, -0.3760, -0.3811,
        -0.3644, -0.3583, -0.4012, -0.3301,  0.1856, -0.0401, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.2924, -0.3568, -0.2814, -0.4255, -0.3760, -0.3811,
        -0.3644, -0.3583, -0.4012, -0.3301,  0.1856, -0.0401, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.2924, -0.3568, -0.2814, -0.4255, -0.3760, -0.3811,
        -0.3644, -0.3583, -0.4012, -0.3301,  0.1856, -0.0401, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.2924, -0.3568, -0.2814, -0.4255, -0.3760, -0.3811,
        -0.3644, -0.3583, -0.4012, -0.3301,  0.1856, -0.0401, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.2924, -0.3568, -0.2814, -0.4255, -0.3760, -0.3811,
        -0.3644, -0.3583, -0.4012, -0.3301,  0.1856, -0.0401, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.4164, -0.3562, -0.1308, -0.3834, -0.1289, -0.4183, -0.4196,
        -0.2523, -0.0318,  0.0799, -0.4572, -0.3963, -0.3719, -0.3924, -0.5123,
        -0.1895,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4164, -0.3562, -0.1308, -0.3834, -0.1289, -0.4183, -0.4196,
        -0.2523, -0.0318,  0.0799, -0.4572, -0.3963, -0.3719, -0.3924, -

Tokens embeddings:
tensor([-0.1035, -0.4164, -0.3562, -0.1308, -0.3834, -0.1289, -0.4183, -0.4196,
        -0.2523, -0.0318,  0.0799, -0.4572, -0.3963, -0.3719, -0.3924, -0.5123,
        -0.1895,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4164, -0.3562, -0.1308, -0.3834, -0.1289, -0.4183, -0.4196,
        -0.2523, -0.0318,  0.0799, -0.4572, -0.3963, -0.3719, -0.3924, -

Tokens embeddings:
tensor([-0.1035, -0.4164, -0.3562, -0.1308, -0.3834, -0.1289, -0.4183, -0.4196,
        -0.2523, -0.0318,  0.0799, -0.4572, -0.3963, -0.3719, -0.3924, -0.5123,
        -0.1895,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4164, -0.3562, -0.1308, -0.3834, -0.1289, -0.4183, -0.4196,
        -0.2523, -0.0318,  0.0799, -0.4572, -0.3963, -0.3719, -0.3924, -

Tokens embeddings:
tensor([-0.1035, -0.4164, -0.3562, -0.1308, -0.3834, -0.1289, -0.4183, -0.4196,
        -0.2523, -0.0318,  0.0799, -0.4572, -0.3963, -0.3719, -0.3924, -0.5123,
        -0.1895,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4164, -0.3562, -0.1308, -0.3834, -0.1289, -0.4183, -0.4196,
        -0.2523, -0.0318,  0.0799, -0.4572, -0.3963, -0.3719, -0.3924, -

Tokens embeddings:
tensor([-0.1035, -0.4164, -0.3562, -0.1308, -0.3834, -0.1289, -0.4183, -0.4196,
        -0.2523, -0.0318,  0.0799, -0.4572, -0.3963, -0.3719, -0.3924, -0.5123,
        -0.1895,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4164, -0.3562, -0.1308, -0.3834, -0.1289, -0.4183, -0.4196,
        -0.2523, -0.0318,  0.0799, -0.4572, -0.3963, -0.3719, -0.3924, -

Tokens embeddings:
tensor([-0.1035, -0.4164, -0.3562, -0.1308, -0.3834, -0.1289, -0.4183, -0.4196,
        -0.2523, -0.0318,  0.0799, -0.4572, -0.3963, -0.3719, -0.3924, -0.5123,
        -0.1895,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4164, -0.3562, -0.1308, -0.3834, -0.1289, -0.4183, -0.4196,
        -0.2523, -0.0318,  0.0799, -0.4572, -0.3963, -0.3719, -0.3924, -

Tokens embeddings:
tensor([-0.1035, -0.4164, -0.3562, -0.1308, -0.3834, -0.1289, -0.4183, -0.4196,
        -0.2523, -0.0318,  0.0799, -0.4572, -0.3963, -0.3719, -0.3924, -0.5123,
        -0.1895,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4164, -0.3562, -0.1308, -0.3834, -0.1289, -0.4183, -0.4196,
        -0.2523, -0.0318,  0.0799, -0.4572, -0.3963, -0.3719, -0.3924, -

Tokens embeddings:
tensor([-0.1035, -0.4164, -0.3562, -0.1308, -0.3834, -0.1289, -0.4183, -0.4196,
        -0.2523, -0.0318,  0.0799, -0.4572, -0.3963, -0.3719, -0.3924, -0.5123,
        -0.1895,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4164, -0.3562, -0.1308, -0.3834, -0.1289, -0.4183, -0.4196,
        -0.2523, -0.0318,  0.0799, -0.4572, -0.3963, -0.3719, -0.3924, -

Tokens embeddings:
tensor([-0.1035, -0.4164, -0.3562, -0.1308, -0.3834, -0.1289, -0.4183, -0.4196,
        -0.2523, -0.0318,  0.0799, -0.4572, -0.3963, -0.3719, -0.3924, -0.5123,
        -0.1895,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4164, -0.3562, -0.1308, -0.3834, -0.1289, -0.4183, -0.4196,
        -0.2523, -0.0318,  0.0799, -0.4572, -0.3963, -0.3719, -0.3924, -

Tokens embeddings:
tensor([-0.1035, -0.4164, -0.3562, -0.1308, -0.3834, -0.1289, -0.4183, -0.4196,
        -0.2523, -0.0318,  0.0799, -0.4572, -0.3963, -0.3719, -0.3924, -0.5123,
        -0.1895,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4164, -0.3562, -0.1308, -0.3834, -0.1289, -0.4183, -0.4196,
        -0.2523, -0.0318,  0.0799, -0.4572, -0.3963, -0.3719, -0.3924, -

Tokens embeddings:
tensor([-0.1035, -0.4164, -0.3562, -0.1308, -0.3834, -0.1289, -0.4183, -0.4196,
        -0.2523, -0.0318,  0.0799, -0.4572, -0.3963, -0.3719, -0.3924, -0.5123,
        -0.1895,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4164, -0.3562, -0.1308, -0.3834, -0.1289, -0.4183, -0.4196,
        -0.2523, -0.0318,  0.0799, -0.4572, -0.3963, -0.3719, -0.3924, -

Tokens embeddings:
tensor([-0.1035, -0.4164, -0.3562, -0.1308, -0.3834, -0.1289, -0.4183, -0.4196,
        -0.2523, -0.0318,  0.0799, -0.4572, -0.3963, -0.3719, -0.3924, -0.5123,
        -0.1895,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4164, -0.3562, -0.1308, -0.3834, -0.1289, -0.4183, -0.4196,
        -0.2523, -0.0318,  0.0799, -0.4572, -0.3963, -0.3719, -0.3924, -

Tokens embeddings:
tensor([-0.1035, -0.5095, -0.4207,  0.1856, -0.3811, -0.0578, -0.0983, -0.1504,
        -0.3614, -0.0196, -0.4284, -0.0040,  0.0793, -0.4376, -0.4583,  0.0036,
        -0.3834, -0.3100, -0.1448, -0.3753, -0.3574, -0.4183,  0.1088,  0.1856,
        -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.5095, -0.4207,  0.1856, -0.3811, -0.0578, -0.0983, -0.1504,
        -0.3614, -0.0196, -0.4284, -0.0040,  0.0793, -0.4376, -0.4583,  

Tokens embeddings:
tensor([-0.1035, -0.5095, -0.4207,  0.1856, -0.3811, -0.0578, -0.0983, -0.1504,
        -0.3614, -0.0196, -0.4284, -0.0040,  0.0793, -0.4376, -0.4583,  0.0036,
        -0.3834, -0.3100, -0.1448, -0.3753, -0.3574, -0.4183,  0.1088,  0.1856,
        -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.5095, -0.4207,  0.1856, -0.3811, -0.0578, -0.0983, -0.1504,
        -0.3614, -0.0196, -0.4284, -0.0040,  0.0793, -0.4376, -0.4583,  

Tokens embeddings:
tensor([-0.1035, -0.5095, -0.4207,  0.1856, -0.3811, -0.0578, -0.0983, -0.1504,
        -0.3614, -0.0196, -0.4284, -0.0040,  0.0793, -0.4376, -0.4583,  0.0036,
        -0.3834, -0.3100, -0.1448, -0.3753, -0.3574, -0.4183,  0.1088,  0.1856,
        -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.5095, -0.4207,  0.1856, -0.3811, -0.0578, -0.0983, -0.1504,
        -0.3614, -0.0196, -0.4284, -0.0040,  0.0793, -0.4376, -0.4583,  

Tokens embeddings:
tensor([-0.1035, -0.5095, -0.4207,  0.1856, -0.3811, -0.0578, -0.0983, -0.1504,
        -0.3614, -0.0196, -0.4284, -0.0040,  0.0793, -0.4376, -0.4583,  0.0036,
        -0.3834, -0.3100, -0.1448, -0.3753, -0.3574, -0.4183,  0.1088,  0.1856,
        -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.5095, -0.4207,  0.1856, -0.3811, -0.0578, -0.0983, -0.1504,
        -0.3614, -0.0196, -0.4284, -0.0040,  0.0793, -0.4376, -0.4583,  

Tokens embeddings:
tensor([-0.1035, -0.5095, -0.4207,  0.1856, -0.3811, -0.0578, -0.0983, -0.1504,
        -0.3614, -0.0196, -0.4284, -0.0040,  0.0793, -0.4376, -0.4583,  0.0036,
        -0.3834, -0.3100, -0.1448, -0.3753, -0.3574, -0.4183,  0.1088,  0.1856,
        -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.5095, -0.4207,  0.1856, -0.3811, -0.0578, -0.0983, -0.1504,
        -0.3614, -0.0196, -0.4284, -0.0040,  0.0793, -0.4376, -0.4583,  

Tokens embeddings:
tensor([-0.1035, -0.5095, -0.4207,  0.1856, -0.3811, -0.0578, -0.0983, -0.1504,
        -0.3614, -0.0196, -0.4284, -0.0040,  0.0793, -0.4376, -0.4583,  0.0036,
        -0.3834, -0.3100, -0.1448, -0.3753, -0.3574, -0.4183,  0.1088,  0.1856,
        -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.5095, -0.4207,  0.1856, -0.3811, -0.0578, -0.0983, -0.1504,
        -0.3614, -0.0196, -0.4284, -0.0040,  0.0793, -0.4376, -0.4583,  

Tokens embeddings:
tensor([-0.1035, -0.5095, -0.4207,  0.1856, -0.3811, -0.0578, -0.0983, -0.1504,
        -0.3614, -0.0196, -0.4284, -0.0040,  0.0793, -0.4376, -0.4583,  0.0036,
        -0.3834, -0.3100, -0.1448, -0.3753, -0.3574, -0.4183,  0.1088,  0.1856,
        -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.5095, -0.4207,  0.1856, -0.3811, -0.0578, -0.0983, -0.1504,
        -0.3614, -0.0196, -0.4284, -0.0040,  0.0793, -0.4376, -0.4583,  

Tokens embeddings:
tensor([-0.1035, -0.5095, -0.4207,  0.1856, -0.3811, -0.0578, -0.0983, -0.1504,
        -0.3614, -0.0196, -0.4284, -0.0040,  0.0793, -0.4376, -0.4583,  0.0036,
        -0.3834, -0.3100, -0.1448, -0.3753, -0.3574, -0.4183,  0.1088,  0.1856,
        -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.5095, -0.4207,  0.1856, -0.3811, -0.0578, -0.0983, -0.1504,
        -0.3614, -0.0196, -0.4284, -0.0040,  0.0793, -0.4376, -0.4583,  

Tokens embeddings:
tensor([-0.1035, -0.5095, -0.4207,  0.1856, -0.3811, -0.0578, -0.0983, -0.1504,
        -0.3614, -0.0196, -0.4284, -0.0040,  0.0793, -0.4376, -0.4583,  0.0036,
        -0.3834, -0.3100, -0.1448, -0.3753, -0.3574, -0.4183,  0.1088,  0.1856,
        -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.5095, -0.4207,  0.1856, -0.3811, -0.0578, -0.0983, -0.1504,
        -0.3614, -0.0196, -0.4284, -0.0040,  0.0793, -0.4376, -0.4583,  

Tokens embeddings:
tensor([-0.1035, -0.5095, -0.4207,  0.1856, -0.3811, -0.0578, -0.0983, -0.1504,
        -0.3614, -0.0196, -0.4284, -0.0040,  0.0793, -0.4376, -0.4583,  0.0036,
        -0.3834, -0.3100, -0.1448, -0.3753, -0.3574, -0.4183,  0.1088,  0.1856,
        -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.5095, -0.4207,  0.1856, -0.3811, -0.0578, -0.0983, -0.1504,
        -0.3614, -0.0196, -0.4284, -0.0040,  0.0793, -0.4376, -0.4583,  

Tokens embeddings:
tensor([-0.1035, -0.5095, -0.4207,  0.1856, -0.3811, -0.0578, -0.0983, -0.1504,
        -0.3614, -0.0196, -0.4284, -0.0040,  0.0793, -0.4376, -0.4583,  0.0036,
        -0.3834, -0.3100, -0.1448, -0.3753, -0.3574, -0.4183,  0.1088,  0.1856,
        -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.5095, -0.4207,  0.1856, -0.3811, -0.0578, -0.0983, -0.1504,
        -0.3614, -0.0196, -0.4284, -0.0040,  0.0793, -0.4376, -0.4583,  

Tokens embeddings:
tensor([-0.1035, -0.5095, -0.4207,  0.1856, -0.3811, -0.0578, -0.0983, -0.1504,
        -0.3614, -0.0196, -0.4284, -0.0040,  0.0793, -0.4376, -0.4583,  0.0036,
        -0.3834, -0.3100, -0.1448, -0.3753, -0.3574, -0.4183,  0.1088,  0.1856,
        -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.5095, -0.4207,  0.1856, -0.3811, -0.0578, -0.0983, -0.1504,
        -0.3614, -0.0196, -0.4284, -0.0040,  0.0793, -0.4376, -0.4583,  

Tokens embeddings:
tensor([-0.1035, -0.4207,  0.1856, -0.3205, -0.0550, -0.3977, -0.4264, -0.4183,
        -0.1220, -0.3204, -0.4375, -0.1834, -0.3569, -0.1741, -0.1445, -0.3568,
        -0.3901, -0.3901, -0.3555, -0.4350, -0.4058, -0.3958, -0.4348,  0.2137,
        -0.2364, -0.3756, -0.3638, -0.1679, -0.1220, -0.3924, -0.4183, -0.3225,
        -0.3574, -0.3908, -0.4183, -0.0148, -0.3640, -0.4253, -0.3335, -0.3020,
        -0.0040, -0.3834, -0.3834, -0.3298, -0.4264,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4207,  0.1856, -0.3205, -0.0550, -0.3977, -0.4264, -0.4183,
        -0.1220, -0.3204, -0.4375, -0.1834, -0.3569, -0.1741, -0.1445, -

Tokens embeddings:
tensor([-0.1035, -0.4207,  0.1856, -0.3205, -0.0550, -0.3977, -0.4264, -0.4183,
        -0.1220, -0.3204, -0.4375, -0.1834, -0.3569, -0.1741, -0.1445, -0.3568,
        -0.3901, -0.3901, -0.3555, -0.4350, -0.4058, -0.3958, -0.4348,  0.2137,
        -0.2364, -0.3756, -0.3638, -0.1679, -0.1220, -0.3924, -0.4183, -0.3225,
        -0.3574, -0.3908, -0.4183, -0.0148, -0.3640, -0.4253, -0.3335, -0.3020,
        -0.0040, -0.3834, -0.3834, -0.3298, -0.4264,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4207,  0.1856, -0.3205, -0.0550, -0.3977, -0.4264, -0.4183,
        -0.1220, -0.3204, -0.4375, -0.1834, -0.3569, -0.1741, -0.1445, -

Tokens embeddings:
tensor([-0.1035, -0.4207,  0.1856, -0.3205, -0.0550, -0.3977, -0.4264, -0.4183,
        -0.1220, -0.3204, -0.4375, -0.1834, -0.3569, -0.1741, -0.1445, -0.3568,
        -0.3901, -0.3901, -0.3555, -0.4350, -0.4058, -0.3958, -0.4348,  0.2137,
        -0.2364, -0.3756, -0.3638, -0.1679, -0.1220, -0.3924, -0.4183, -0.3225,
        -0.3574, -0.3908, -0.4183, -0.0148, -0.3640, -0.4253, -0.3335, -0.3020,
        -0.0040, -0.3834, -0.3834, -0.3298, -0.4264,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4207,  0.1856, -0.3205, -0.0550, -0.3977, -0.4264, -0.4183,
        -0.1220, -0.3204, -0.4375, -0.1834, -0.3569, -0.1741, -0.1445, -

Tokens embeddings:
tensor([-0.1035, -0.4207,  0.1856, -0.3205, -0.0550, -0.3977, -0.4264, -0.4183,
        -0.1220, -0.3204, -0.4375, -0.1834, -0.3569, -0.1741, -0.1445, -0.3568,
        -0.3901, -0.3901, -0.3555, -0.4350, -0.4058, -0.3958, -0.4348,  0.2137,
        -0.2364, -0.3756, -0.3638, -0.1679, -0.1220, -0.3924, -0.4183, -0.3225,
        -0.3574, -0.3908, -0.4183, -0.0148, -0.3640, -0.4253, -0.3335, -0.3020,
        -0.0040, -0.3834, -0.3834, -0.3298, -0.4264,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4207,  0.1856, -0.3205, -0.0550, -0.3977, -0.4264, -0.4183,
        -0.1220, -0.3204, -0.4375, -0.1834, -0.3569, -0.1741, -0.1445, -

Tokens embeddings:
tensor([-0.1035, -0.4207,  0.1856, -0.3205, -0.0550, -0.3977, -0.4264, -0.4183,
        -0.1220, -0.3204, -0.4375, -0.1834, -0.3569, -0.1741, -0.1445, -0.3568,
        -0.3901, -0.3901, -0.3555, -0.4350, -0.4058, -0.3958, -0.4348,  0.2137,
        -0.2364, -0.3756, -0.3638, -0.1679, -0.1220, -0.3924, -0.4183, -0.3225,
        -0.3574, -0.3908, -0.4183, -0.0148, -0.3640, -0.4253, -0.3335, -0.3020,
        -0.0040, -0.3834, -0.3834, -0.3298, -0.4264,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4207,  0.1856, -0.3205, -0.0550, -0.3977, -0.4264, -0.4183,
        -0.1220, -0.3204, -0.4375, -0.1834, -0.3569, -0.1741, -0.1445, -

Tokens embeddings:
tensor([-0.1035, -0.4207,  0.1856, -0.3205, -0.0550, -0.3977, -0.4264, -0.4183,
        -0.1220, -0.3204, -0.4375, -0.1834, -0.3569, -0.1741, -0.1445, -0.3568,
        -0.3901, -0.3901, -0.3555, -0.4350, -0.4058, -0.3958, -0.4348,  0.2137,
        -0.2364, -0.3756, -0.3638, -0.1679, -0.1220, -0.3924, -0.4183, -0.3225,
        -0.3574, -0.3908, -0.4183, -0.0148, -0.3640, -0.4253, -0.3335, -0.3020,
        -0.0040, -0.3834, -0.3834, -0.3298, -0.4264,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4207,  0.1856, -0.3205, -0.0550, -0.3977, -0.4264, -0.4183,
        -0.1220, -0.3204, -0.4375, -0.1834, -0.3569, -0.1741, -0.1445, -

Tokens embeddings:
tensor([-0.1035, -0.4207,  0.1856, -0.3205, -0.0550, -0.3977, -0.4264, -0.4183,
        -0.1220, -0.3204, -0.4375, -0.1834, -0.3569, -0.1741, -0.1445, -0.3568,
        -0.3901, -0.3901, -0.3555, -0.4350, -0.4058, -0.3958, -0.4348,  0.2137,
        -0.2364, -0.3756, -0.3638, -0.1679, -0.1220, -0.3924, -0.4183, -0.3225,
        -0.3574, -0.3908, -0.4183, -0.0148, -0.3640, -0.4253, -0.3335, -0.3020,
        -0.0040, -0.3834, -0.3834, -0.3298, -0.4264,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4207,  0.1856, -0.3205, -0.0550, -0.3977, -0.4264, -0.4183,
        -0.1220, -0.3204, -0.4375, -0.1834, -0.3569, -0.1741, -0.1445, -

Tokens embeddings:
tensor([-0.1035, -0.4207,  0.1856, -0.3205, -0.0550, -0.3977, -0.4264, -0.4183,
        -0.1220, -0.3204, -0.4375, -0.1834, -0.3569, -0.1741, -0.1445, -0.3568,
        -0.3901, -0.3901, -0.3555, -0.4350, -0.4058, -0.3958, -0.4348,  0.2137,
        -0.2364, -0.3756, -0.3638, -0.1679, -0.1220, -0.3924, -0.4183, -0.3225,
        -0.3574, -0.3908, -0.4183, -0.0148, -0.3640, -0.4253, -0.3335, -0.3020,
        -0.0040, -0.3834, -0.3834, -0.3298, -0.4264,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4207,  0.1856, -0.3205, -0.0550, -0.3977, -0.4264, -0.4183,
        -0.1220, -0.3204, -0.4375, -0.1834, -0.3569, -0.1741, -0.1445, -

Tokens embeddings:
tensor([-0.1035, -0.4207,  0.1856, -0.3205, -0.0550, -0.3977, -0.4264, -0.4183,
        -0.1220, -0.3204, -0.4375, -0.1834, -0.3569, -0.1741, -0.1445, -0.3568,
        -0.3901, -0.3901, -0.3555, -0.4350, -0.4058, -0.3958, -0.4348,  0.2137,
        -0.2364, -0.3756, -0.3638, -0.1679, -0.1220, -0.3924, -0.4183, -0.3225,
        -0.3574, -0.3908, -0.4183, -0.0148, -0.3640, -0.4253, -0.3335, -0.3020,
        -0.0040, -0.3834, -0.3834, -0.3298, -0.4264,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4207,  0.1856, -0.3205, -0.0550, -0.3977, -0.4264, -0.4183,
        -0.1220, -0.3204, -0.4375, -0.1834, -0.3569, -0.1741, -0.1445, -

Tokens embeddings:
tensor([-0.1035, -0.4207,  0.1856, -0.3205, -0.0550, -0.3977, -0.4264, -0.4183,
        -0.1220, -0.3204, -0.4375, -0.1834, -0.3569, -0.1741, -0.1445, -0.3568,
        -0.3901, -0.3901, -0.3555, -0.4350, -0.4058, -0.3958, -0.4348,  0.2137,
        -0.2364, -0.3756, -0.3638, -0.1679, -0.1220, -0.3924, -0.4183, -0.3225,
        -0.3574, -0.3908, -0.4183, -0.0148, -0.3640, -0.4253, -0.3335, -0.3020,
        -0.0040, -0.3834, -0.3834, -0.3298, -0.4264,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4207,  0.1856, -0.3205, -0.0550, -0.3977, -0.4264, -0.4183,
        -0.1220, -0.3204, -0.4375, -0.1834, -0.3569, -0.1741, -0.1445, -

Tokens embeddings:
tensor([-0.1035, -0.4207,  0.1856, -0.3205, -0.0550, -0.3977, -0.4264, -0.4183,
        -0.1220, -0.3204, -0.4375, -0.1834, -0.3569, -0.1741, -0.1445, -0.3568,
        -0.3901, -0.3901, -0.3555, -0.4350, -0.4058, -0.3958, -0.4348,  0.2137,
        -0.2364, -0.3756, -0.3638, -0.1679, -0.1220, -0.3924, -0.4183, -0.3225,
        -0.3574, -0.3908, -0.4183, -0.0148, -0.3640, -0.4253, -0.3335, -0.3020,
        -0.0040, -0.3834, -0.3834, -0.3298, -0.4264,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4207,  0.1856, -0.3205, -0.0550, -0.3977, -0.4264, -0.4183,
        -0.1220, -0.3204, -0.4375, -0.1834, -0.3569, -0.1741, -0.1445, -

Tokens embeddings:
tensor([-0.1035, -0.4207,  0.1856, -0.3205, -0.0550, -0.3977, -0.4264, -0.4183,
        -0.1220, -0.3204, -0.4375, -0.1834, -0.3569, -0.1741, -0.1445, -0.3568,
        -0.3901, -0.3901, -0.3555, -0.4350, -0.4058, -0.3958, -0.4348,  0.2137,
        -0.2364, -0.3756, -0.3638, -0.1679, -0.1220, -0.3924, -0.4183, -0.3225,
        -0.3574, -0.3908, -0.4183, -0.0148, -0.3640, -0.4253, -0.3335, -0.3020,
        -0.0040, -0.3834, -0.3834, -0.3298, -0.4264,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4207,  0.1856, -0.3205, -0.0550, -0.3977, -0.4264, -0.4183,
        -0.1220, -0.3204, -0.4375, -0.1834, -0.3569, -0.1741, -0.1445, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.3585, -0.3622, -0.2770, -0.0040, -0.4007,  0.0668,
        -0.0896, -0.1865, -0.4496, -0.3374, -0.4182, -0.3913, -0.0040, -0.2400,
        -0.3257, -0.3228, -0.3701, -0.3272, -0.0448, -0.0040, -0.3877, -0.3630,
        -0.3228,  0.1856, -0.2349, -0.0448, -0.1048,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.3585, -0.3622, -0.2770, -0.0040, -0.4007,  0.0668,
        -0.0896, -0.1865, -0.4496, -0.3374, -0.4182, -0.3913, -0.0040, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.3585, -0.3622, -0.2770, -0.0040, -0.4007,  0.0668,
        -0.0896, -0.1865, -0.4496, -0.3374, -0.4182, -0.3913, -0.0040, -0.2400,
        -0.3257, -0.3228, -0.3701, -0.3272, -0.0448, -0.0040, -0.3877, -0.3630,
        -0.3228,  0.1856, -0.2349, -0.0448, -0.1048,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.3585, -0.3622, -0.2770, -0.0040, -0.4007,  0.0668,
        -0.0896, -0.1865, -0.4496, -0.3374, -0.4182, -0.3913, -0.0040, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.3585, -0.3622, -0.2770, -0.0040, -0.4007,  0.0668,
        -0.0896, -0.1865, -0.4496, -0.3374, -0.4182, -0.3913, -0.0040, -0.2400,
        -0.3257, -0.3228, -0.3701, -0.3272, -0.0448, -0.0040, -0.3877, -0.3630,
        -0.3228,  0.1856, -0.2349, -0.0448, -0.1048,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.3585, -0.3622, -0.2770, -0.0040, -0.4007,  0.0668,
        -0.0896, -0.1865, -0.4496, -0.3374, -0.4182, -0.3913, -0.0040, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.3585, -0.3622, -0.2770, -0.0040, -0.4007,  0.0668,
        -0.0896, -0.1865, -0.4496, -0.3374, -0.4182, -0.3913, -0.0040, -0.2400,
        -0.3257, -0.3228, -0.3701, -0.3272, -0.0448, -0.0040, -0.3877, -0.3630,
        -0.3228,  0.1856, -0.2349, -0.0448, -0.1048,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.3585, -0.3622, -0.2770, -0.0040, -0.4007,  0.0668,
        -0.0896, -0.1865, -0.4496, -0.3374, -0.4182, -0.3913, -0.0040, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.3585, -0.3622, -0.2770, -0.0040, -0.4007,  0.0668,
        -0.0896, -0.1865, -0.4496, -0.3374, -0.4182, -0.3913, -0.0040, -0.2400,
        -0.3257, -0.3228, -0.3701, -0.3272, -0.0448, -0.0040, -0.3877, -0.3630,
        -0.3228,  0.1856, -0.2349, -0.0448, -0.1048,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.3585, -0.3622, -0.2770, -0.0040, -0.4007,  0.0668,
        -0.0896, -0.1865, -0.4496, -0.3374, -0.4182, -0.3913, -0.0040, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.3585, -0.3622, -0.2770, -0.0040, -0.4007,  0.0668,
        -0.0896, -0.1865, -0.4496, -0.3374, -0.4182, -0.3913, -0.0040, -0.2400,
        -0.3257, -0.3228, -0.3701, -0.3272, -0.0448, -0.0040, -0.3877, -0.3630,
        -0.3228,  0.1856, -0.2349, -0.0448, -0.1048,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.3585, -0.3622, -0.2770, -0.0040, -0.4007,  0.0668,
        -0.0896, -0.1865, -0.4496, -0.3374, -0.4182, -0.3913, -0.0040, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.3585, -0.3622, -0.2770, -0.0040, -0.4007,  0.0668,
        -0.0896, -0.1865, -0.4496, -0.3374, -0.4182, -0.3913, -0.0040, -0.2400,
        -0.3257, -0.3228, -0.3701, -0.3272, -0.0448, -0.0040, -0.3877, -0.3630,
        -0.3228,  0.1856, -0.2349, -0.0448, -0.1048,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.3585, -0.3622, -0.2770, -0.0040, -0.4007,  0.0668,
        -0.0896, -0.1865, -0.4496, -0.3374, -0.4182, -0.3913, -0.0040, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.3585, -0.3622, -0.2770, -0.0040, -0.4007,  0.0668,
        -0.0896, -0.1865, -0.4496, -0.3374, -0.4182, -0.3913, -0.0040, -0.2400,
        -0.3257, -0.3228, -0.3701, -0.3272, -0.0448, -0.0040, -0.3877, -0.3630,
        -0.3228,  0.1856, -0.2349, -0.0448, -0.1048,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.3585, -0.3622, -0.2770, -0.0040, -0.4007,  0.0668,
        -0.0896, -0.1865, -0.4496, -0.3374, -0.4182, -0.3913, -0.0040, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.3585, -0.3622, -0.2770, -0.0040, -0.4007,  0.0668,
        -0.0896, -0.1865, -0.4496, -0.3374, -0.4182, -0.3913, -0.0040, -0.2400,
        -0.3257, -0.3228, -0.3701, -0.3272, -0.0448, -0.0040, -0.3877, -0.3630,
        -0.3228,  0.1856, -0.2349, -0.0448, -0.1048,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.3585, -0.3622, -0.2770, -0.0040, -0.4007,  0.0668,
        -0.0896, -0.1865, -0.4496, -0.3374, -0.4182, -0.3913, -0.0040, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.3585, -0.3622, -0.2770, -0.0040, -0.4007,  0.0668,
        -0.0896, -0.1865, -0.4496, -0.3374, -0.4182, -0.3913, -0.0040, -0.2400,
        -0.3257, -0.3228, -0.3701, -0.3272, -0.0448, -0.0040, -0.3877, -0.3630,
        -0.3228,  0.1856, -0.2349, -0.0448, -0.1048,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.3585, -0.3622, -0.2770, -0.0040, -0.4007,  0.0668,
        -0.0896, -0.1865, -0.4496, -0.3374, -0.4182, -0.3913, -0.0040, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.3585, -0.3622, -0.2770, -0.0040, -0.4007,  0.0668,
        -0.0896, -0.1865, -0.4496, -0.3374, -0.4182, -0.3913, -0.0040, -0.2400,
        -0.3257, -0.3228, -0.3701, -0.3272, -0.0448, -0.0040, -0.3877, -0.3630,
        -0.3228,  0.1856, -0.2349, -0.0448, -0.1048,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.3585, -0.3622, -0.2770, -0.0040, -0.4007,  0.0668,
        -0.0896, -0.1865, -0.4496, -0.3374, -0.4182, -0.3913, -0.0040, -

Tokens embeddings:
tensor([-0.1035, -0.4183, -0.3585, -0.3622, -0.2770, -0.0040, -0.4007,  0.0668,
        -0.0896, -0.1865, -0.4496, -0.3374, -0.4182, -0.3913, -0.0040, -0.2400,
        -0.3257, -0.3228, -0.3701, -0.3272, -0.0448, -0.0040, -0.3877, -0.3630,
        -0.3228,  0.1856, -0.2349, -0.0448, -0.1048,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4183, -0.3585, -0.3622, -0.2770, -0.0040, -0.4007,  0.0668,
        -0.0896, -0.1865, -0.4496, -0.3374, -0.4182, -0.3913, -0.0040, -

Tokens embeddings:
tensor([-0.1035, -0.4317, -0.2303, -0.4551, -0.3944, -0.3067, -0.3630, -0.4183,
        -0.1992,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4317, -0.2303, -0.4551, -0.3944, -0.3067, -0.3630, -0.4183,
        -0.1992,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.4317, -0.2303, -0.4551, -0.3944, -0.3067, -0.3630, -0.4183,
        -0.1992,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4317, -0.2303, -0.4551, -0.3944, -0.3067, -0.3630, -0.4183,
        -0.1992,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.4317, -0.2303, -0.4551, -0.3944, -0.3067, -0.3630, -0.4183,
        -0.1992,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4317, -0.2303, -0.4551, -0.3944, -0.3067, -0.3630, -0.4183,
        -0.1992,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.4317, -0.2303, -0.4551, -0.3944, -0.3067, -0.3630, -0.4183,
        -0.1992,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4317, -0.2303, -0.4551, -0.3944, -0.3067, -0.3630, -0.4183,
        -0.1992,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.4317, -0.2303, -0.4551, -0.3944, -0.3067, -0.3630, -0.4183,
        -0.1992,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4317, -0.2303, -0.4551, -0.3944, -0.3067, -0.3630, -0.4183,
        -0.1992,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.4317, -0.2303, -0.4551, -0.3944, -0.3067, -0.3630, -0.4183,
        -0.1992,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4317, -0.2303, -0.4551, -0.3944, -0.3067, -0.3630, -0.4183,
        -0.1992,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.4317, -0.2303, -0.4551, -0.3944, -0.3067, -0.3630, -0.4183,
        -0.1992,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4317, -0.2303, -0.4551, -0.3944, -0.3067, -0.3630, -0.4183,
        -0.1992,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.4317, -0.2303, -0.4551, -0.3944, -0.3067, -0.3630, -0.4183,
        -0.1992,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4317, -0.2303, -0.4551, -0.3944, -0.3067, -0.3630, -0.4183,
        -0.1992,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.4317, -0.2303, -0.4551, -0.3944, -0.3067, -0.3630, -0.4183,
        -0.1992,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4317, -0.2303, -0.4551, -0.3944, -0.3067, -0.3630, -0.4183,
        -0.1992,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.4317, -0.2303, -0.4551, -0.3944, -0.3067, -0.3630, -0.4183,
        -0.1992,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4317, -0.2303, -0.4551, -0.3944, -0.3067, -0.3630, -0.4183,
        -0.1992,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.4317, -0.2303, -0.4551, -0.3944, -0.3067, -0.3630, -0.4183,
        -0.1992,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4317, -0.2303, -0.4551, -0.3944, -0.3067, -0.3630, -0.4183,
        -0.1992,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.4317, -0.2303, -0.4551, -0.3944, -0.3067, -0.3630, -0.4183,
        -0.1992,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.4317, -0.2303, -0.4551, -0.3944, -0.3067, -0.3630, -0.4183,
        -0.1992,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.3901, -0.3901, -0.3857, -0.4186, -0.2448, -0.3335, -0.2235,
        -0.3755, -0.3924, -0.3857, -0.4696, -0.3373, -0.4853, -0.4157, -0.0588,
        -0.3226, -0.4183,  0.2349, -0.2364, -0.2990, -0.3020, -0.3622, -0.3877,
        -0.0040, -0.3834, -0.3834, -0.3298, -0.3869,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3901, -0.3901, -0.3857, -0.4186, -0.2448, -0.3335, -0.2235,
        -0.3755, -0.3924, -0.3857, -0.4696, -0.3373, -0.4853, -0.4157, -

Tokens embeddings:
tensor([-0.1035, -0.3901, -0.3901, -0.3857, -0.4186, -0.2448, -0.3335, -0.2235,
        -0.3755, -0.3924, -0.3857, -0.4696, -0.3373, -0.4853, -0.4157, -0.0588,
        -0.3226, -0.4183,  0.2349, -0.2364, -0.2990, -0.3020, -0.3622, -0.3877,
        -0.0040, -0.3834, -0.3834, -0.3298, -0.3869,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3901, -0.3901, -0.3857, -0.4186, -0.2448, -0.3335, -0.2235,
        -0.3755, -0.3924, -0.3857, -0.4696, -0.3373, -0.4853, -0.4157, -

Tokens embeddings:
tensor([-0.1035, -0.3901, -0.3901, -0.3857, -0.4186, -0.2448, -0.3335, -0.2235,
        -0.3755, -0.3924, -0.3857, -0.4696, -0.3373, -0.4853, -0.4157, -0.0588,
        -0.3226, -0.4183,  0.2349, -0.2364, -0.2990, -0.3020, -0.3622, -0.3877,
        -0.0040, -0.3834, -0.3834, -0.3298, -0.3869,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3901, -0.3901, -0.3857, -0.4186, -0.2448, -0.3335, -0.2235,
        -0.3755, -0.3924, -0.3857, -0.4696, -0.3373, -0.4853, -0.4157, -

Tokens embeddings:
tensor([-0.1035, -0.3901, -0.3901, -0.3857, -0.4186, -0.2448, -0.3335, -0.2235,
        -0.3755, -0.3924, -0.3857, -0.4696, -0.3373, -0.4853, -0.4157, -0.0588,
        -0.3226, -0.4183,  0.2349, -0.2364, -0.2990, -0.3020, -0.3622, -0.3877,
        -0.0040, -0.3834, -0.3834, -0.3298, -0.3869,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3901, -0.3901, -0.3857, -0.4186, -0.2448, -0.3335, -0.2235,
        -0.3755, -0.3924, -0.3857, -0.4696, -0.3373, -0.4853, -0.4157, -

Tokens embeddings:
tensor([-0.1035, -0.3901, -0.3901, -0.3857, -0.4186, -0.2448, -0.3335, -0.2235,
        -0.3755, -0.3924, -0.3857, -0.4696, -0.3373, -0.4853, -0.4157, -0.0588,
        -0.3226, -0.4183,  0.2349, -0.2364, -0.2990, -0.3020, -0.3622, -0.3877,
        -0.0040, -0.3834, -0.3834, -0.3298, -0.3869,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3901, -0.3901, -0.3857, -0.4186, -0.2448, -0.3335, -0.2235,
        -0.3755, -0.3924, -0.3857, -0.4696, -0.3373, -0.4853, -0.4157, -

Tokens embeddings:
tensor([-0.1035, -0.3901, -0.3901, -0.3857, -0.4186, -0.2448, -0.3335, -0.2235,
        -0.3755, -0.3924, -0.3857, -0.4696, -0.3373, -0.4853, -0.4157, -0.0588,
        -0.3226, -0.4183,  0.2349, -0.2364, -0.2990, -0.3020, -0.3622, -0.3877,
        -0.0040, -0.3834, -0.3834, -0.3298, -0.3869,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3901, -0.3901, -0.3857, -0.4186, -0.2448, -0.3335, -0.2235,
        -0.3755, -0.3924, -0.3857, -0.4696, -0.3373, -0.4853, -0.4157, -

Tokens embeddings:
tensor([-0.1035, -0.3901, -0.3901, -0.3857, -0.4186, -0.2448, -0.3335, -0.2235,
        -0.3755, -0.3924, -0.3857, -0.4696, -0.3373, -0.4853, -0.4157, -0.0588,
        -0.3226, -0.4183,  0.2349, -0.2364, -0.2990, -0.3020, -0.3622, -0.3877,
        -0.0040, -0.3834, -0.3834, -0.3298, -0.3869,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3901, -0.3901, -0.3857, -0.4186, -0.2448, -0.3335, -0.2235,
        -0.3755, -0.3924, -0.3857, -0.4696, -0.3373, -0.4853, -0.4157, -

Tokens embeddings:
tensor([-0.1035, -0.3901, -0.3901, -0.3857, -0.4186, -0.2448, -0.3335, -0.2235,
        -0.3755, -0.3924, -0.3857, -0.4696, -0.3373, -0.4853, -0.4157, -0.0588,
        -0.3226, -0.4183,  0.2349, -0.2364, -0.2990, -0.3020, -0.3622, -0.3877,
        -0.0040, -0.3834, -0.3834, -0.3298, -0.3869,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3901, -0.3901, -0.3857, -0.4186, -0.2448, -0.3335, -0.2235,
        -0.3755, -0.3924, -0.3857, -0.4696, -0.3373, -0.4853, -0.4157, -

Tokens embeddings:
tensor([-0.1035, -0.3901, -0.3901, -0.3857, -0.4186, -0.2448, -0.3335, -0.2235,
        -0.3755, -0.3924, -0.3857, -0.4696, -0.3373, -0.4853, -0.4157, -0.0588,
        -0.3226, -0.4183,  0.2349, -0.2364, -0.2990, -0.3020, -0.3622, -0.3877,
        -0.0040, -0.3834, -0.3834, -0.3298, -0.3869,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3901, -0.3901, -0.3857, -0.4186, -0.2448, -0.3335, -0.2235,
        -0.3755, -0.3924, -0.3857, -0.4696, -0.3373, -0.4853, -0.4157, -

Tokens embeddings:
tensor([-0.1035, -0.3901, -0.3901, -0.3857, -0.4186, -0.2448, -0.3335, -0.2235,
        -0.3755, -0.3924, -0.3857, -0.4696, -0.3373, -0.4853, -0.4157, -0.0588,
        -0.3226, -0.4183,  0.2349, -0.2364, -0.2990, -0.3020, -0.3622, -0.3877,
        -0.0040, -0.3834, -0.3834, -0.3298, -0.3869,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3901, -0.3901, -0.3857, -0.4186, -0.2448, -0.3335, -0.2235,
        -0.3755, -0.3924, -0.3857, -0.4696, -0.3373, -0.4853, -0.4157, -

Tokens embeddings:
tensor([-0.1035, -0.3901, -0.3901, -0.3857, -0.4186, -0.2448, -0.3335, -0.2235,
        -0.3755, -0.3924, -0.3857, -0.4696, -0.3373, -0.4853, -0.4157, -0.0588,
        -0.3226, -0.4183,  0.2349, -0.2364, -0.2990, -0.3020, -0.3622, -0.3877,
        -0.0040, -0.3834, -0.3834, -0.3298, -0.3869,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3901, -0.3901, -0.3857, -0.4186, -0.2448, -0.3335, -0.2235,
        -0.3755, -0.3924, -0.3857, -0.4696, -0.3373, -0.4853, -0.4157, -

Tokens embeddings:
tensor([-0.1035, -0.3901, -0.3901, -0.3857, -0.4186, -0.2448, -0.3335, -0.2235,
        -0.3755, -0.3924, -0.3857, -0.4696, -0.3373, -0.4853, -0.4157, -0.0588,
        -0.3226, -0.4183,  0.2349, -0.2364, -0.2990, -0.3020, -0.3622, -0.3877,
        -0.0040, -0.3834, -0.3834, -0.3298, -0.3869,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.3901, -0.3901, -0.3857, -0.4186, -0.2448, -0.3335, -0.2235,
        -0.3755, -0.3924, -0.3857, -0.4696, -0.3373, -0.4853, -0.4157, -

In [33]:
# Max Pooling for all tokens in sentences and argmax

# Select some sentences randomly
sent_index = [11, 12, 13, 14, 15, 16, 17, 18, 19, 20]

for s in sent_index:
  print("*" * 100)
  # Get the sentence's words
  tokens = test_inputs[s]
  # For each layer...
  for l in range(12):
    print("\nLayer", l+1)
    attention = np.squeeze(test_attentions[s][l].detach().cpu().numpy(), axis=0)
    # and for each head
    #for h, head in enumerate(attention):
    #print("Head", h+1)
    # Get the attention for the cls token
    encoded_tokens = bert_tokenizer(tokens, padding=True, truncation=True, max_length=128, return_tensors='pt')
    encoded_tokens = encoded_tokens.to(device)
    with torch.no_grad():
      model_output1 = model_e(**encoded_tokens)
      tokens_embeddings = max_pooling(model_output1, encoded_tokens['attention_mask'])
      tokens_embeddings = tokens_embeddings.cpu()
      cls_attentions = head[0]
      display(HTML(colorize(tokens, cls_attentions)))
      print("Tokens embeddings:")
      print(tokens_embeddings)
      arg = argmax(tokens_embeddings)
      print('arg max of %s: %d' % (tokens_embeddings, arg))

****************************************************************************************************

Layer 1


Tokens embeddings:
tensor([-1.0346e-01, -5.9233e-02, -1.0000e+09, -2.1756e-02, -1.2673e-01,
        -1.0000e+09, -1.1936e-01, -2.1925e-01, -1.0000e+09, -1.4316e-01,
        -2.9110e-01, -1.0000e+09, -1.4075e-01,  1.2046e-01, -1.0000e+09,
        -1.4277e-01, -2.3636e-01, -1.0000e+09, -1.0019e-01, -3.7066e-01,
        -1.0000e+09, -9.5430e-02, -3.8573e-01, -1.0000e+09, -5.8018e-02,
        -8.9630e-02, -1.0000e+09, -1.4610e-01,  1.8556e-01, -1.0000e+09,
        -1.2252e-01, -1.5364e-01, -2.2685e-01, -1.2514e-01, -3.5331e-01,
        -1.0000e+09, -9.3270e-02, -2.5827e-01, -1.0000e+09, -4.3532e-02,
        -1.7413e-01, -1.0000e+09, -2.1756e-02, -1.9518e-01, -1.0000e+09,
        -1.2093e-01,  4.3223e-02, -1.0000e+09, -1.1936e-01, -2.3187e-01,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -5.9233e-02, -1.0000e+09, -2.1756e-02, -1.2673e-01,
        -1.0000e+09, -1.1936e-01, -2.1925e-01, -1.0000e+09, -1.4316e-01,
        -2.9110e-01, -1.0000e+09, -1.4075e-01,  1.2046e-01, -1.0000e+09,
        -1.4277e-01, -2.3636e-01, -1.0000e+09, -1.0019e-01, -3.7066e-01,
        -1.0000e+09, -9.5430e-02, -3.8573e-01, -1.0000e+09, -5.8018e-02,
        -8.9630e-02, -1.0000e+09, -1.4610e-01,  1.8556e-01, -1.0000e+09,
        -1.2252e-01, -1.5364e-01, -2.2685e-01, -1.2514e-01, -3.5331e-01,
        -1.0000e+09, -9.3270e-02, -2.5827e-01, -1.0000e+09, -4.3532e-02,
        -1.7413e-01, -1.0000e+09, -2.1756e-02, -1.9518e-01, -1.0000e+09,
        -1.2093e-01,  4.3223e-02, -1.0000e+09, -1.1936e-01, -2.3187e-01,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -5.9233e-02, -1.0000e+09, -2.1756e-02, -1.2673e-01,
        -1.0000e+09, -1.1936e-01, -2.1925e-01, -1.0000e+09, -1.4316e-01,
        -2.9110e-01, -1.0000e+09, -1.4075e-01,  1.2046e-01, -1.0000e+09,
        -1.4277e-01, -2.3636e-01, -1.0000e+09, -1.0019e-01, -3.7066e-01,
        -1.0000e+09, -9.5430e-02, -3.8573e-01, -1.0000e+09, -5.8018e-02,
        -8.9630e-02, -1.0000e+09, -1.4610e-01,  1.8556e-01, -1.0000e+09,
        -1.2252e-01, -1.5364e-01, -2.2685e-01, -1.2514e-01, -3.5331e-01,
        -1.0000e+09, -9.3270e-02, -2.5827e-01, -1.0000e+09, -4.3532e-02,
        -1.7413e-01, -1.0000e+09, -2.1756e-02, -1.9518e-01, -1.0000e+09,
        -1.2093e-01,  4.3223e-02, -1.0000e+09, -1.1936e-01, -2.3187e-01,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -5.9233e-02, -1.0000e+09, -2.1756e-02, -1.2673e-01,
        -1.0000e+09, -1.1936e-01, -2.1925e-01, -1.0000e+09, -1.4316e-01,
        -2.9110e-01, -1.0000e+09, -1.4075e-01,  1.2046e-01, -1.0000e+09,
        -1.4277e-01, -2.3636e-01, -1.0000e+09, -1.0019e-01, -3.7066e-01,
        -1.0000e+09, -9.5430e-02, -3.8573e-01, -1.0000e+09, -5.8018e-02,
        -8.9630e-02, -1.0000e+09, -1.4610e-01,  1.8556e-01, -1.0000e+09,
        -1.2252e-01, -1.5364e-01, -2.2685e-01, -1.2514e-01, -3.5331e-01,
        -1.0000e+09, -9.3270e-02, -2.5827e-01, -1.0000e+09, -4.3532e-02,
        -1.7413e-01, -1.0000e+09, -2.1756e-02, -1.9518e-01, -1.0000e+09,
        -1.2093e-01,  4.3223e-02, -1.0000e+09, -1.1936e-01, -2.3187e-01,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -5.9233e-02, -1.0000e+09, -2.1756e-02, -1.2673e-01,
        -1.0000e+09, -1.1936e-01, -2.1925e-01, -1.0000e+09, -1.4316e-01,
        -2.9110e-01, -1.0000e+09, -1.4075e-01,  1.2046e-01, -1.0000e+09,
        -1.4277e-01, -2.3636e-01, -1.0000e+09, -1.0019e-01, -3.7066e-01,
        -1.0000e+09, -9.5430e-02, -3.8573e-01, -1.0000e+09, -5.8018e-02,
        -8.9630e-02, -1.0000e+09, -1.4610e-01,  1.8556e-01, -1.0000e+09,
        -1.2252e-01, -1.5364e-01, -2.2685e-01, -1.2514e-01, -3.5331e-01,
        -1.0000e+09, -9.3270e-02, -2.5827e-01, -1.0000e+09, -4.3532e-02,
        -1.7413e-01, -1.0000e+09, -2.1756e-02, -1.9518e-01, -1.0000e+09,
        -1.2093e-01,  4.3223e-02, -1.0000e+09, -1.1936e-01, -2.3187e-01,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -5.9233e-02, -1.0000e+09, -2.1756e-02, -1.2673e-01,
        -1.0000e+09, -1.1936e-01, -2.1925e-01, -1.0000e+09, -1.4316e-01,
        -2.9110e-01, -1.0000e+09, -1.4075e-01,  1.2046e-01, -1.0000e+09,
        -1.4277e-01, -2.3636e-01, -1.0000e+09, -1.0019e-01, -3.7066e-01,
        -1.0000e+09, -9.5430e-02, -3.8573e-01, -1.0000e+09, -5.8018e-02,
        -8.9630e-02, -1.0000e+09, -1.4610e-01,  1.8556e-01, -1.0000e+09,
        -1.2252e-01, -1.5364e-01, -2.2685e-01, -1.2514e-01, -3.5331e-01,
        -1.0000e+09, -9.3270e-02, -2.5827e-01, -1.0000e+09, -4.3532e-02,
        -1.7413e-01, -1.0000e+09, -2.1756e-02, -1.9518e-01, -1.0000e+09,
        -1.2093e-01,  4.3223e-02, -1.0000e+09, -1.1936e-01, -2.3187e-01,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -5.9233e-02, -1.0000e+09, -2.1756e-02, -1.2673e-01,
        -1.0000e+09, -1.1936e-01, -2.1925e-01, -1.0000e+09, -1.4316e-01,
        -2.9110e-01, -1.0000e+09, -1.4075e-01,  1.2046e-01, -1.0000e+09,
        -1.4277e-01, -2.3636e-01, -1.0000e+09, -1.0019e-01, -3.7066e-01,
        -1.0000e+09, -9.5430e-02, -3.8573e-01, -1.0000e+09, -5.8018e-02,
        -8.9630e-02, -1.0000e+09, -1.4610e-01,  1.8556e-01, -1.0000e+09,
        -1.2252e-01, -1.5364e-01, -2.2685e-01, -1.2514e-01, -3.5331e-01,
        -1.0000e+09, -9.3270e-02, -2.5827e-01, -1.0000e+09, -4.3532e-02,
        -1.7413e-01, -1.0000e+09, -2.1756e-02, -1.9518e-01, -1.0000e+09,
        -1.2093e-01,  4.3223e-02, -1.0000e+09, -1.1936e-01, -2.3187e-01,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -5.9233e-02, -1.0000e+09, -2.1756e-02, -1.2673e-01,
        -1.0000e+09, -1.1936e-01, -2.1925e-01, -1.0000e+09, -1.4316e-01,
        -2.9110e-01, -1.0000e+09, -1.4075e-01,  1.2046e-01, -1.0000e+09,
        -1.4277e-01, -2.3636e-01, -1.0000e+09, -1.0019e-01, -3.7066e-01,
        -1.0000e+09, -9.5430e-02, -3.8573e-01, -1.0000e+09, -5.8018e-02,
        -8.9630e-02, -1.0000e+09, -1.4610e-01,  1.8556e-01, -1.0000e+09,
        -1.2252e-01, -1.5364e-01, -2.2685e-01, -1.2514e-01, -3.5331e-01,
        -1.0000e+09, -9.3270e-02, -2.5827e-01, -1.0000e+09, -4.3532e-02,
        -1.7413e-01, -1.0000e+09, -2.1756e-02, -1.9518e-01, -1.0000e+09,
        -1.2093e-01,  4.3223e-02, -1.0000e+09, -1.1936e-01, -2.3187e-01,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -5.9233e-02, -1.0000e+09, -2.1756e-02, -1.2673e-01,
        -1.0000e+09, -1.1936e-01, -2.1925e-01, -1.0000e+09, -1.4316e-01,
        -2.9110e-01, -1.0000e+09, -1.4075e-01,  1.2046e-01, -1.0000e+09,
        -1.4277e-01, -2.3636e-01, -1.0000e+09, -1.0019e-01, -3.7066e-01,
        -1.0000e+09, -9.5430e-02, -3.8573e-01, -1.0000e+09, -5.8018e-02,
        -8.9630e-02, -1.0000e+09, -1.4610e-01,  1.8556e-01, -1.0000e+09,
        -1.2252e-01, -1.5364e-01, -2.2685e-01, -1.2514e-01, -3.5331e-01,
        -1.0000e+09, -9.3270e-02, -2.5827e-01, -1.0000e+09, -4.3532e-02,
        -1.7413e-01, -1.0000e+09, -2.1756e-02, -1.9518e-01, -1.0000e+09,
        -1.2093e-01,  4.3223e-02, -1.0000e+09, -1.1936e-01, -2.3187e-01,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -5.9233e-02, -1.0000e+09, -2.1756e-02, -1.2673e-01,
        -1.0000e+09, -1.1936e-01, -2.1925e-01, -1.0000e+09, -1.4316e-01,
        -2.9110e-01, -1.0000e+09, -1.4075e-01,  1.2046e-01, -1.0000e+09,
        -1.4277e-01, -2.3636e-01, -1.0000e+09, -1.0019e-01, -3.7066e-01,
        -1.0000e+09, -9.5430e-02, -3.8573e-01, -1.0000e+09, -5.8018e-02,
        -8.9630e-02, -1.0000e+09, -1.4610e-01,  1.8556e-01, -1.0000e+09,
        -1.2252e-01, -1.5364e-01, -2.2685e-01, -1.2514e-01, -3.5331e-01,
        -1.0000e+09, -9.3270e-02, -2.5827e-01, -1.0000e+09, -4.3532e-02,
        -1.7413e-01, -1.0000e+09, -2.1756e-02, -1.9518e-01, -1.0000e+09,
        -1.2093e-01,  4.3223e-02, -1.0000e+09, -1.1936e-01, -2.3187e-01,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -5.9233e-02, -1.0000e+09, -2.1756e-02, -1.2673e-01,
        -1.0000e+09, -1.1936e-01, -2.1925e-01, -1.0000e+09, -1.4316e-01,
        -2.9110e-01, -1.0000e+09, -1.4075e-01,  1.2046e-01, -1.0000e+09,
        -1.4277e-01, -2.3636e-01, -1.0000e+09, -1.0019e-01, -3.7066e-01,
        -1.0000e+09, -9.5430e-02, -3.8573e-01, -1.0000e+09, -5.8018e-02,
        -8.9630e-02, -1.0000e+09, -1.4610e-01,  1.8556e-01, -1.0000e+09,
        -1.2252e-01, -1.5364e-01, -2.2685e-01, -1.2514e-01, -3.5331e-01,
        -1.0000e+09, -9.3270e-02, -2.5827e-01, -1.0000e+09, -4.3532e-02,
        -1.7413e-01, -1.0000e+09, -2.1756e-02, -1.9518e-01, -1.0000e+09,
        -1.2093e-01,  4.3223e-02, -1.0000e+09, -1.1936e-01, -2.3187e-01,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -5.9233e-02, -1.0000e+09, -2.1756e-02, -1.2673e-01,
        -1.0000e+09, -1.1936e-01, -2.1925e-01, -1.0000e+09, -1.4316e-01,
        -2.9110e-01, -1.0000e+09, -1.4075e-01,  1.2046e-01, -1.0000e+09,
        -1.4277e-01, -2.3636e-01, -1.0000e+09, -1.0019e-01, -3.7066e-01,
        -1.0000e+09, -9.5430e-02, -3.8573e-01, -1.0000e+09, -5.8018e-02,
        -8.9630e-02, -1.0000e+09, -1.4610e-01,  1.8556e-01, -1.0000e+09,
        -1.2252e-01, -1.5364e-01, -2.2685e-01, -1.2514e-01, -3.5331e-01,
        -1.0000e+09, -9.3270e-02, -2.5827e-01, -1.0000e+09, -4.3532e-02,
        -1.7413e-01, -1.0000e+09, -2.1756e-02, -1.9518e-01, -1.0000e+09,
        -1.2093e-01,  4.3223e-02, -1.0000e+09, -1.1936e-01, -2.3187e-01,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.3475e-01, -1.0000e+09, -2.1756e-02, -4.3158e-01,
        -1.0000e+09, -9.0812e-02, -2.4386e-01, -1.0000e+09, -2.1756e-02,
        -2.7757e-01, -1.0000e+09, -8.8176e-02, -4.1832e-01, -1.0000e+09,
        -4.0188e-03, -6.5544e-03, -1.1269e-01, -1.0650e-01, -1.4793e-01,
        -3.8338e-01, -1.4492e-01, -2.8796e-01, -1.0000e+09, -3.9778e-02,
        -3.8573e-01, -1.0000e+09, -9.5430e-02, -4.1832e-01, -1.0000e+09,
         1.8556e-01, -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.3475e-01, -1.0000e+09, -2.1756e-02, -4.3158e-01,
        -1.0000e+09, -9.0812e-02, -2.4386e-01, -1.0000e+09, -2.1756e-02,
        -2.7757e-01, -1.0000e+09, -8.8176e-02, -4.1832e-01, -1.0000e+09,
        -4.0188e-03, -6.5544e-03, -1.1269e-01, -1.0650e-01, -1.4793e-01,
        -3.8338e-01, -1.4492e-01, -2.8796e-01, -1.0000e+09, -3.9778e-02,
        -3.8573e-01, -1.0000e+09, -9.5430e-02, -4.1832e-01, -1.0000e+09,
         1.8556e-01, -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.3475e-01, -1.0000e+09, -2.1756e-02, -4.3158e-01,
        -1.0000e+09, -9.0812e-02, -2.4386e-01, -1.0000e+09, -2.1756e-02,
        -2.7757e-01, -1.0000e+09, -8.8176e-02, -4.1832e-01, -1.0000e+09,
        -4.0188e-03, -6.5544e-03, -1.1269e-01, -1.0650e-01, -1.4793e-01,
        -3.8338e-01, -1.4492e-01, -2.8796e-01, -1.0000e+09, -3.9778e-02,
        -3.8573e-01, -1.0000e+09, -9.5430e-02, -4.1832e-01, -1.0000e+09,
         1.8556e-01, -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.3475e-01, -1.0000e+09, -2.1756e-02, -4.3158e-01,
        -1.0000e+09, -9.0812e-02, -2.4386e-01, -1.0000e+09, -2.1756e-02,
        -2.7757e-01, -1.0000e+09, -8.8176e-02, -4.1832e-01, -1.0000e+09,
        -4.0188e-03, -6.5544e-03, -1.1269e-01, -1.0650e-01, -1.4793e-01,
        -3.8338e-01, -1.4492e-01, -2.8796e-01, -1.0000e+09, -3.9778e-02,
        -3.8573e-01, -1.0000e+09, -9.5430e-02, -4.1832e-01, -1.0000e+09,
         1.8556e-01, -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.3475e-01, -1.0000e+09, -2.1756e-02, -4.3158e-01,
        -1.0000e+09, -9.0812e-02, -2.4386e-01, -1.0000e+09, -2.1756e-02,
        -2.7757e-01, -1.0000e+09, -8.8176e-02, -4.1832e-01, -1.0000e+09,
        -4.0188e-03, -6.5544e-03, -1.1269e-01, -1.0650e-01, -1.4793e-01,
        -3.8338e-01, -1.4492e-01, -2.8796e-01, -1.0000e+09, -3.9778e-02,
        -3.8573e-01, -1.0000e+09, -9.5430e-02, -4.1832e-01, -1.0000e+09,
         1.8556e-01, -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.3475e-01, -1.0000e+09, -2.1756e-02, -4.3158e-01,
        -1.0000e+09, -9.0812e-02, -2.4386e-01, -1.0000e+09, -2.1756e-02,
        -2.7757e-01, -1.0000e+09, -8.8176e-02, -4.1832e-01, -1.0000e+09,
        -4.0188e-03, -6.5544e-03, -1.1269e-01, -1.0650e-01, -1.4793e-01,
        -3.8338e-01, -1.4492e-01, -2.8796e-01, -1.0000e+09, -3.9778e-02,
        -3.8573e-01, -1.0000e+09, -9.5430e-02, -4.1832e-01, -1.0000e+09,
         1.8556e-01, -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.3475e-01, -1.0000e+09, -2.1756e-02, -4.3158e-01,
        -1.0000e+09, -9.0812e-02, -2.4386e-01, -1.0000e+09, -2.1756e-02,
        -2.7757e-01, -1.0000e+09, -8.8176e-02, -4.1832e-01, -1.0000e+09,
        -4.0188e-03, -6.5544e-03, -1.1269e-01, -1.0650e-01, -1.4793e-01,
        -3.8338e-01, -1.4492e-01, -2.8796e-01, -1.0000e+09, -3.9778e-02,
        -3.8573e-01, -1.0000e+09, -9.5430e-02, -4.1832e-01, -1.0000e+09,
         1.8556e-01, -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.3475e-01, -1.0000e+09, -2.1756e-02, -4.3158e-01,
        -1.0000e+09, -9.0812e-02, -2.4386e-01, -1.0000e+09, -2.1756e-02,
        -2.7757e-01, -1.0000e+09, -8.8176e-02, -4.1832e-01, -1.0000e+09,
        -4.0188e-03, -6.5544e-03, -1.1269e-01, -1.0650e-01, -1.4793e-01,
        -3.8338e-01, -1.4492e-01, -2.8796e-01, -1.0000e+09, -3.9778e-02,
        -3.8573e-01, -1.0000e+09, -9.5430e-02, -4.1832e-01, -1.0000e+09,
         1.8556e-01, -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.3475e-01, -1.0000e+09, -2.1756e-02, -4.3158e-01,
        -1.0000e+09, -9.0812e-02, -2.4386e-01, -1.0000e+09, -2.1756e-02,
        -2.7757e-01, -1.0000e+09, -8.8176e-02, -4.1832e-01, -1.0000e+09,
        -4.0188e-03, -6.5544e-03, -1.1269e-01, -1.0650e-01, -1.4793e-01,
        -3.8338e-01, -1.4492e-01, -2.8796e-01, -1.0000e+09, -3.9778e-02,
        -3.8573e-01, -1.0000e+09, -9.5430e-02, -4.1832e-01, -1.0000e+09,
         1.8556e-01, -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.3475e-01, -1.0000e+09, -2.1756e-02, -4.3158e-01,
        -1.0000e+09, -9.0812e-02, -2.4386e-01, -1.0000e+09, -2.1756e-02,
        -2.7757e-01, -1.0000e+09, -8.8176e-02, -4.1832e-01, -1.0000e+09,
        -4.0188e-03, -6.5544e-03, -1.1269e-01, -1.0650e-01, -1.4793e-01,
        -3.8338e-01, -1.4492e-01, -2.8796e-01, -1.0000e+09, -3.9778e-02,
        -3.8573e-01, -1.0000e+09, -9.5430e-02, -4.1832e-01, -1.0000e+09,
         1.8556e-01, -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.3475e-01, -1.0000e+09, -2.1756e-02, -4.3158e-01,
        -1.0000e+09, -9.0812e-02, -2.4386e-01, -1.0000e+09, -2.1756e-02,
        -2.7757e-01, -1.0000e+09, -8.8176e-02, -4.1832e-01, -1.0000e+09,
        -4.0188e-03, -6.5544e-03, -1.1269e-01, -1.0650e-01, -1.4793e-01,
        -3.8338e-01, -1.4492e-01, -2.8796e-01, -1.0000e+09, -3.9778e-02,
        -3.8573e-01, -1.0000e+09, -9.5430e-02, -4.1832e-01, -1.0000e+09,
         1.8556e-01, -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.3475e-01, -1.0000e+09, -2.1756e-02, -4.3158e-01,
        -1.0000e+09, -9.0812e-02, -2.4386e-01, -1.0000e+09, -2.1756e-02,
        -2.7757e-01, -1.0000e+09, -8.8176e-02, -4.1832e-01, -1.0000e+09,
        -4.0188e-03, -6.5544e-03, -1.1269e-01, -1.0650e-01, -1.4793e-01,
        -3.8338e-01, -1.4492e-01, -2.8796e-01, -1.0000e+09, -3.9778e-02,
        -3.8573e-01, -1.0000e+09, -9.5430e-02, -4.1832e-01, -1.0000e+09,
         1.8556e-01, -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -3.5172e-01, -1.0000e+09,  1.8556e-01, -2.9615e-01,
        -1.0000e+09, -4.0188e-03, -3.8573e-01, -1.0000e+09, -9.9040e-02,
        -3.8338e-01, -1.0000e+09, -1.3656e-01, -3.3351e-01, -1.0000e+09,
        -7.9769e-02, -5.9233e-02, -1.0000e+09, -1.4389e-01, -4.0188e-03,
        -1.0000e+09, -1.5021e-01,  1.8556e-01, -1.0000e+09, -8.3306e-02,
        -5.7668e-02, -1.0000e+09, -8.7435e-02, -4.2132e-01, -1.0000e+09,
        -1.2094e-01, -4.4013e-01, -1.0000e+09, -9.4922e-02, -2.2356e-01,
        -1.0000e+09, -7.9537e-02, -3.2822e-01, -1.0000e+09, -1.7506e-01,
        -3.8573e-01, -1.0000e+09, -1.3126e-01, -4.3257e-01, -1.0000e+09,
        -1.2094e-01, -3.9916e-01, -1.0000e+09, -9.5430e-02, -8.0162e-02,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -3.5172e-01, -1.0000e+09,  1.8556e-01, -2.9615e-01,
        -1.0000e+09, -4.0188e-03, -3.8573e-01, -1.0000e+09, -9.9040e-02,
        -3.8338e-01, -1.0000e+09, -1.3656e-01, -3.3351e-01, -1.0000e+09,
        -7.9769e-02, -5.9233e-02, -1.0000e+09, -1.4389e-01, -4.0188e-03,
        -1.0000e+09, -1.5021e-01,  1.8556e-01, -1.0000e+09, -8.3306e-02,
        -5.7668e-02, -1.0000e+09, -8.7435e-02, -4.2132e-01, -1.0000e+09,
        -1.2094e-01, -4.4013e-01, -1.0000e+09, -9.4922e-02, -2.2356e-01,
        -1.0000e+09, -7.9537e-02, -3.2822e-01, -1.0000e+09, -1.7506e-01,
        -3.8573e-01, -1.0000e+09, -1.3126e-01, -4.3257e-01, -1.0000e+09,
        -1.2094e-01, -3.9916e-01, -1.0000e+09, -9.5430e-02, -8.0162e-02,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -3.5172e-01, -1.0000e+09,  1.8556e-01, -2.9615e-01,
        -1.0000e+09, -4.0188e-03, -3.8573e-01, -1.0000e+09, -9.9040e-02,
        -3.8338e-01, -1.0000e+09, -1.3656e-01, -3.3351e-01, -1.0000e+09,
        -7.9769e-02, -5.9233e-02, -1.0000e+09, -1.4389e-01, -4.0188e-03,
        -1.0000e+09, -1.5021e-01,  1.8556e-01, -1.0000e+09, -8.3306e-02,
        -5.7668e-02, -1.0000e+09, -8.7435e-02, -4.2132e-01, -1.0000e+09,
        -1.2094e-01, -4.4013e-01, -1.0000e+09, -9.4922e-02, -2.2356e-01,
        -1.0000e+09, -7.9537e-02, -3.2822e-01, -1.0000e+09, -1.7506e-01,
        -3.8573e-01, -1.0000e+09, -1.3126e-01, -4.3257e-01, -1.0000e+09,
        -1.2094e-01, -3.9916e-01, -1.0000e+09, -9.5430e-02, -8.0162e-02,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -3.5172e-01, -1.0000e+09,  1.8556e-01, -2.9615e-01,
        -1.0000e+09, -4.0188e-03, -3.8573e-01, -1.0000e+09, -9.9040e-02,
        -3.8338e-01, -1.0000e+09, -1.3656e-01, -3.3351e-01, -1.0000e+09,
        -7.9769e-02, -5.9233e-02, -1.0000e+09, -1.4389e-01, -4.0188e-03,
        -1.0000e+09, -1.5021e-01,  1.8556e-01, -1.0000e+09, -8.3306e-02,
        -5.7668e-02, -1.0000e+09, -8.7435e-02, -4.2132e-01, -1.0000e+09,
        -1.2094e-01, -4.4013e-01, -1.0000e+09, -9.4922e-02, -2.2356e-01,
        -1.0000e+09, -7.9537e-02, -3.2822e-01, -1.0000e+09, -1.7506e-01,
        -3.8573e-01, -1.0000e+09, -1.3126e-01, -4.3257e-01, -1.0000e+09,
        -1.2094e-01, -3.9916e-01, -1.0000e+09, -9.5430e-02, -8.0162e-02,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -3.5172e-01, -1.0000e+09,  1.8556e-01, -2.9615e-01,
        -1.0000e+09, -4.0188e-03, -3.8573e-01, -1.0000e+09, -9.9040e-02,
        -3.8338e-01, -1.0000e+09, -1.3656e-01, -3.3351e-01, -1.0000e+09,
        -7.9769e-02, -5.9233e-02, -1.0000e+09, -1.4389e-01, -4.0188e-03,
        -1.0000e+09, -1.5021e-01,  1.8556e-01, -1.0000e+09, -8.3306e-02,
        -5.7668e-02, -1.0000e+09, -8.7435e-02, -4.2132e-01, -1.0000e+09,
        -1.2094e-01, -4.4013e-01, -1.0000e+09, -9.4922e-02, -2.2356e-01,
        -1.0000e+09, -7.9537e-02, -3.2822e-01, -1.0000e+09, -1.7506e-01,
        -3.8573e-01, -1.0000e+09, -1.3126e-01, -4.3257e-01, -1.0000e+09,
        -1.2094e-01, -3.9916e-01, -1.0000e+09, -9.5430e-02, -8.0162e-02,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -3.5172e-01, -1.0000e+09,  1.8556e-01, -2.9615e-01,
        -1.0000e+09, -4.0188e-03, -3.8573e-01, -1.0000e+09, -9.9040e-02,
        -3.8338e-01, -1.0000e+09, -1.3656e-01, -3.3351e-01, -1.0000e+09,
        -7.9769e-02, -5.9233e-02, -1.0000e+09, -1.4389e-01, -4.0188e-03,
        -1.0000e+09, -1.5021e-01,  1.8556e-01, -1.0000e+09, -8.3306e-02,
        -5.7668e-02, -1.0000e+09, -8.7435e-02, -4.2132e-01, -1.0000e+09,
        -1.2094e-01, -4.4013e-01, -1.0000e+09, -9.4922e-02, -2.2356e-01,
        -1.0000e+09, -7.9537e-02, -3.2822e-01, -1.0000e+09, -1.7506e-01,
        -3.8573e-01, -1.0000e+09, -1.3126e-01, -4.3257e-01, -1.0000e+09,
        -1.2094e-01, -3.9916e-01, -1.0000e+09, -9.5430e-02, -8.0162e-02,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -3.5172e-01, -1.0000e+09,  1.8556e-01, -2.9615e-01,
        -1.0000e+09, -4.0188e-03, -3.8573e-01, -1.0000e+09, -9.9040e-02,
        -3.8338e-01, -1.0000e+09, -1.3656e-01, -3.3351e-01, -1.0000e+09,
        -7.9769e-02, -5.9233e-02, -1.0000e+09, -1.4389e-01, -4.0188e-03,
        -1.0000e+09, -1.5021e-01,  1.8556e-01, -1.0000e+09, -8.3306e-02,
        -5.7668e-02, -1.0000e+09, -8.7435e-02, -4.2132e-01, -1.0000e+09,
        -1.2094e-01, -4.4013e-01, -1.0000e+09, -9.4922e-02, -2.2356e-01,
        -1.0000e+09, -7.9537e-02, -3.2822e-01, -1.0000e+09, -1.7506e-01,
        -3.8573e-01, -1.0000e+09, -1.3126e-01, -4.3257e-01, -1.0000e+09,
        -1.2094e-01, -3.9916e-01, -1.0000e+09, -9.5430e-02, -8.0162e-02,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -3.5172e-01, -1.0000e+09,  1.8556e-01, -2.9615e-01,
        -1.0000e+09, -4.0188e-03, -3.8573e-01, -1.0000e+09, -9.9040e-02,
        -3.8338e-01, -1.0000e+09, -1.3656e-01, -3.3351e-01, -1.0000e+09,
        -7.9769e-02, -5.9233e-02, -1.0000e+09, -1.4389e-01, -4.0188e-03,
        -1.0000e+09, -1.5021e-01,  1.8556e-01, -1.0000e+09, -8.3306e-02,
        -5.7668e-02, -1.0000e+09, -8.7435e-02, -4.2132e-01, -1.0000e+09,
        -1.2094e-01, -4.4013e-01, -1.0000e+09, -9.4922e-02, -2.2356e-01,
        -1.0000e+09, -7.9537e-02, -3.2822e-01, -1.0000e+09, -1.7506e-01,
        -3.8573e-01, -1.0000e+09, -1.3126e-01, -4.3257e-01, -1.0000e+09,
        -1.2094e-01, -3.9916e-01, -1.0000e+09, -9.5430e-02, -8.0162e-02,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -3.5172e-01, -1.0000e+09,  1.8556e-01, -2.9615e-01,
        -1.0000e+09, -4.0188e-03, -3.8573e-01, -1.0000e+09, -9.9040e-02,
        -3.8338e-01, -1.0000e+09, -1.3656e-01, -3.3351e-01, -1.0000e+09,
        -7.9769e-02, -5.9233e-02, -1.0000e+09, -1.4389e-01, -4.0188e-03,
        -1.0000e+09, -1.5021e-01,  1.8556e-01, -1.0000e+09, -8.3306e-02,
        -5.7668e-02, -1.0000e+09, -8.7435e-02, -4.2132e-01, -1.0000e+09,
        -1.2094e-01, -4.4013e-01, -1.0000e+09, -9.4922e-02, -2.2356e-01,
        -1.0000e+09, -7.9537e-02, -3.2822e-01, -1.0000e+09, -1.7506e-01,
        -3.8573e-01, -1.0000e+09, -1.3126e-01, -4.3257e-01, -1.0000e+09,
        -1.2094e-01, -3.9916e-01, -1.0000e+09, -9.5430e-02, -8.0162e-02,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -3.5172e-01, -1.0000e+09,  1.8556e-01, -2.9615e-01,
        -1.0000e+09, -4.0188e-03, -3.8573e-01, -1.0000e+09, -9.9040e-02,
        -3.8338e-01, -1.0000e+09, -1.3656e-01, -3.3351e-01, -1.0000e+09,
        -7.9769e-02, -5.9233e-02, -1.0000e+09, -1.4389e-01, -4.0188e-03,
        -1.0000e+09, -1.5021e-01,  1.8556e-01, -1.0000e+09, -8.3306e-02,
        -5.7668e-02, -1.0000e+09, -8.7435e-02, -4.2132e-01, -1.0000e+09,
        -1.2094e-01, -4.4013e-01, -1.0000e+09, -9.4922e-02, -2.2356e-01,
        -1.0000e+09, -7.9537e-02, -3.2822e-01, -1.0000e+09, -1.7506e-01,
        -3.8573e-01, -1.0000e+09, -1.3126e-01, -4.3257e-01, -1.0000e+09,
        -1.2094e-01, -3.9916e-01, -1.0000e+09, -9.5430e-02, -8.0162e-02,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -3.5172e-01, -1.0000e+09,  1.8556e-01, -2.9615e-01,
        -1.0000e+09, -4.0188e-03, -3.8573e-01, -1.0000e+09, -9.9040e-02,
        -3.8338e-01, -1.0000e+09, -1.3656e-01, -3.3351e-01, -1.0000e+09,
        -7.9769e-02, -5.9233e-02, -1.0000e+09, -1.4389e-01, -4.0188e-03,
        -1.0000e+09, -1.5021e-01,  1.8556e-01, -1.0000e+09, -8.3306e-02,
        -5.7668e-02, -1.0000e+09, -8.7435e-02, -4.2132e-01, -1.0000e+09,
        -1.2094e-01, -4.4013e-01, -1.0000e+09, -9.4922e-02, -2.2356e-01,
        -1.0000e+09, -7.9537e-02, -3.2822e-01, -1.0000e+09, -1.7506e-01,
        -3.8573e-01, -1.0000e+09, -1.3126e-01, -4.3257e-01, -1.0000e+09,
        -1.2094e-01, -3.9916e-01, -1.0000e+09, -9.5430e-02, -8.0162e-02,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -3.5172e-01, -1.0000e+09,  1.8556e-01, -2.9615e-01,
        -1.0000e+09, -4.0188e-03, -3.8573e-01, -1.0000e+09, -9.9040e-02,
        -3.8338e-01, -1.0000e+09, -1.3656e-01, -3.3351e-01, -1.0000e+09,
        -7.9769e-02, -5.9233e-02, -1.0000e+09, -1.4389e-01, -4.0188e-03,
        -1.0000e+09, -1.5021e-01,  1.8556e-01, -1.0000e+09, -8.3306e-02,
        -5.7668e-02, -1.0000e+09, -8.7435e-02, -4.2132e-01, -1.0000e+09,
        -1.2094e-01, -4.4013e-01, -1.0000e+09, -9.4922e-02, -2.2356e-01,
        -1.0000e+09, -7.9537e-02, -3.2822e-01, -1.0000e+09, -1.7506e-01,
        -3.8573e-01, -1.0000e+09, -1.3126e-01, -4.3257e-01, -1.0000e+09,
        -1.2094e-01, -3.9916e-01, -1.0000e+09, -9.5430e-02, -8.0162e-02,
        -1.0000e+09, -4.0138e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-0.1035, -0.0218, -0.1153, -0.1536, -0.1337, -0.1594, -0.1119, -0.1524,
        -0.0595, -0.1395, -0.1973, -0.1271,  0.1856, -0.0401, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0218, -0.1153, -0.1536, -0.1337, -0.1594, -0.1119, -0.1524,
        -0.0595, -0.1395, -0.1973, -0.1271,  0.1856, -0.0401, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.0218, -0.1153, -0.1536, -0.1337, -0.1594, -0.1119, -0.1524,
        -0.0595, -0.1395, -0.1973, -0.1271,  0.1856, -0.0401, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0218, -0.1153, -0.1536, -0.1337, -0.1594, -0.1119, -0.1524,
        -0.0595, -0.1395, -0.1973, -0.1271,  0.1856, -0.0401, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.0218, -0.1153, -0.1536, -0.1337, -0.1594, -0.1119, -0.1524,
        -0.0595, -0.1395, -0.1973, -0.1271,  0.1856, -0.0401, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0218, -0.1153, -0.1536, -0.1337, -0.1594, -0.1119, -0.1524,
        -0.0595, -0.1395, -0.1973, -0.1271,  0.1856, -0.0401, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.0218, -0.1153, -0.1536, -0.1337, -0.1594, -0.1119, -0.1524,
        -0.0595, -0.1395, -0.1973, -0.1271,  0.1856, -0.0401, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0218, -0.1153, -0.1536, -0.1337, -0.1594, -0.1119, -0.1524,
        -0.0595, -0.1395, -0.1973, -0.1271,  0.1856, -0.0401, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.0218, -0.1153, -0.1536, -0.1337, -0.1594, -0.1119, -0.1524,
        -0.0595, -0.1395, -0.1973, -0.1271,  0.1856, -0.0401, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0218, -0.1153, -0.1536, -0.1337, -0.1594, -0.1119, -0.1524,
        -0.0595, -0.1395, -0.1973, -0.1271,  0.1856, -0.0401, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.0218, -0.1153, -0.1536, -0.1337, -0.1594, -0.1119, -0.1524,
        -0.0595, -0.1395, -0.1973, -0.1271,  0.1856, -0.0401, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0218, -0.1153, -0.1536, -0.1337, -0.1594, -0.1119, -0.1524,
        -0.0595, -0.1395, -0.1973, -0.1271,  0.1856, -0.0401, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.0218, -0.1153, -0.1536, -0.1337, -0.1594, -0.1119, -0.1524,
        -0.0595, -0.1395, -0.1973, -0.1271,  0.1856, -0.0401, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0218, -0.1153, -0.1536, -0.1337, -0.1594, -0.1119, -0.1524,
        -0.0595, -0.1395, -0.1973, -0.1271,  0.1856, -0.0401, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.0218, -0.1153, -0.1536, -0.1337, -0.1594, -0.1119, -0.1524,
        -0.0595, -0.1395, -0.1973, -0.1271,  0.1856, -0.0401, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0218, -0.1153, -0.1536, -0.1337, -0.1594, -0.1119, -0.1524,
        -0.0595, -0.1395, -0.1973, -0.1271,  0.1856, -0.0401, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.0218, -0.1153, -0.1536, -0.1337, -0.1594, -0.1119, -0.1524,
        -0.0595, -0.1395, -0.1973, -0.1271,  0.1856, -0.0401, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0218, -0.1153, -0.1536, -0.1337, -0.1594, -0.1119, -0.1524,
        -0.0595, -0.1395, -0.1973, -0.1271,  0.1856, -0.0401, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.0218, -0.1153, -0.1536, -0.1337, -0.1594, -0.1119, -0.1524,
        -0.0595, -0.1395, -0.1973, -0.1271,  0.1856, -0.0401, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0218, -0.1153, -0.1536, -0.1337, -0.1594, -0.1119, -0.1524,
        -0.0595, -0.1395, -0.1973, -0.1271,  0.1856, -0.0401, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.0218, -0.1153, -0.1536, -0.1337, -0.1594, -0.1119, -0.1524,
        -0.0595, -0.1395, -0.1973, -0.1271,  0.1856, -0.0401, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0218, -0.1153, -0.1536, -0.1337, -0.1594, -0.1119, -0.1524,
        -0.0595, -0.1395, -0.1973, -0.1271,  0.1856, -0.0401, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.0218, -0.1153, -0.1536, -0.1337, -0.1594, -0.1119, -0.1524,
        -0.0595, -0.1395, -0.1973, -0.1271,  0.1856, -0.0401, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0218, -0.1153, -0.1536, -0.1337, -0.1594, -0.1119, -0.1524,
        -0.0595, -0.1395, -0.1973, -0.1271,  0.1856, -0.0401, -0.0377, -

Tokens embeddings:
tensor([-1.0346e-01, -4.1641e-01, -1.0000e+09, -1.3080e-01, -3.8338e-01,
        -1.0000e+09, -2.1756e-02, -4.1957e-01, -1.0000e+09, -3.1766e-02,
         7.9894e-02, -1.0000e+09, -9.3719e-02, -3.7188e-01, -1.0000e+09,
        -5.1321e-02, -1.8946e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1641e-01, -1.0000e+09, -1.3080e-01, -3.8338e-01,
        -1.0000e+09, -2.1756e-02, -4.1957e-01, -1.0000e+09, -3.1766e-02,
         7.9894e-02, -1.0000e+09, -9.3719e-02, -3.7188e-01, -1.0000e+09,
        -5.1321e-02, -1.8946e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1641e-01, -1.0000e+09, -1.3080e-01, -3.8338e-01,
        -1.0000e+09, -2.1756e-02, -4.1957e-01, -1.0000e+09, -3.1766e-02,
         7.9894e-02, -1.0000e+09, -9.3719e-02, -3.7188e-01, -1.0000e+09,
        -5.1321e-02, -1.8946e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1641e-01, -1.0000e+09, -1.3080e-01, -3.8338e-01,
        -1.0000e+09, -2.1756e-02, -4.1957e-01, -1.0000e+09, -3.1766e-02,
         7.9894e-02, -1.0000e+09, -9.3719e-02, -3.7188e-01, -1.0000e+09,
        -5.1321e-02, -1.8946e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1641e-01, -1.0000e+09, -1.3080e-01, -3.8338e-01,
        -1.0000e+09, -2.1756e-02, -4.1957e-01, -1.0000e+09, -3.1766e-02,
         7.9894e-02, -1.0000e+09, -9.3719e-02, -3.7188e-01, -1.0000e+09,
        -5.1321e-02, -1.8946e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1641e-01, -1.0000e+09, -1.3080e-01, -3.8338e-01,
        -1.0000e+09, -2.1756e-02, -4.1957e-01, -1.0000e+09, -3.1766e-02,
         7.9894e-02, -1.0000e+09, -9.3719e-02, -3.7188e-01, -1.0000e+09,
        -5.1321e-02, -1.8946e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1641e-01, -1.0000e+09, -1.3080e-01, -3.8338e-01,
        -1.0000e+09, -2.1756e-02, -4.1957e-01, -1.0000e+09, -3.1766e-02,
         7.9894e-02, -1.0000e+09, -9.3719e-02, -3.7188e-01, -1.0000e+09,
        -5.1321e-02, -1.8946e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1641e-01, -1.0000e+09, -1.3080e-01, -3.8338e-01,
        -1.0000e+09, -2.1756e-02, -4.1957e-01, -1.0000e+09, -3.1766e-02,
         7.9894e-02, -1.0000e+09, -9.3719e-02, -3.7188e-01, -1.0000e+09,
        -5.1321e-02, -1.8946e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1641e-01, -1.0000e+09, -1.3080e-01, -3.8338e-01,
        -1.0000e+09, -2.1756e-02, -4.1957e-01, -1.0000e+09, -3.1766e-02,
         7.9894e-02, -1.0000e+09, -9.3719e-02, -3.7188e-01, -1.0000e+09,
        -5.1321e-02, -1.8946e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1641e-01, -1.0000e+09, -1.3080e-01, -3.8338e-01,
        -1.0000e+09, -2.1756e-02, -4.1957e-01, -1.0000e+09, -3.1766e-02,
         7.9894e-02, -1.0000e+09, -9.3719e-02, -3.7188e-01, -1.0000e+09,
        -5.1321e-02, -1.8946e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1641e-01, -1.0000e+09, -1.3080e-01, -3.8338e-01,
        -1.0000e+09, -2.1756e-02, -4.1957e-01, -1.0000e+09, -3.1766e-02,
         7.9894e-02, -1.0000e+09, -9.3719e-02, -3.7188e-01, -1.0000e+09,
        -5.1321e-02, -1.8946e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.1641e-01, -1.0000e+09, -1.3080e-01, -3.8338e-01,
        -1.0000e+09, -2.1756e-02, -4.1957e-01, -1.0000e+09, -3.1766e-02,
         7.9894e-02, -1.0000e+09, -9.3719e-02, -3.7188e-01, -1.0000e+09,
        -5.1321e-02, -1.8946e-01, -1.0000e+09, -4.0138e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -5.0954e-01, -1.0000e+09,  1.8556e-01, -3.8111e-01,
        -1.0000e+09, -7.6559e-02, -1.5043e-01, -1.0000e+09, -1.9591e-02,
        -4.2835e-01, -1.0000e+09,  7.9337e-02, -4.3756e-01, -1.0000e+09,
         3.5890e-03, -1.7587e-01, -1.4492e-01, -6.3356e-02, -9.7549e-02,
        -1.3126e-01, -2.1756e-02,  1.0879e-01, -1.0000e+09, -4.0138e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -5.0954e-01, -1.0000e+09,  1.8556e-01, -3.8111e-01,
        -1.0000e+09, -7.6559e-02, -1.5043e-01, -1.0000e+09, -1.9591e-02,
        -4.2835e-01, -1.0000e+09,  7.9337e-02, -4.3756e-01, -1.0000e+09,
         3.5890e-03, -1.7587e-01, -1.4492e-01, -6.3356e-02, -9.7549e-02,
        -1.3126e-01, -2.1756e-02,  1.0879e-01, -1.0000e+09, -4.0138e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -5.0954e-01, -1.0000e+09,  1.8556e-01, -3.8111e-01,
        -1.0000e+09, -7.6559e-02, -1.5043e-01, -1.0000e+09, -1.9591e-02,
        -4.2835e-01, -1.0000e+09,  7.9337e-02, -4.3756e-01, -1.0000e+09,
         3.5890e-03, -1.7587e-01, -1.4492e-01, -6.3356e-02, -9.7549e-02,
        -1.3126e-01, -2.1756e-02,  1.0879e-01, -1.0000e+09, -4.0138e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -5.0954e-01, -1.0000e+09,  1.8556e-01, -3.8111e-01,
        -1.0000e+09, -7.6559e-02, -1.5043e-01, -1.0000e+09, -1.9591e-02,
        -4.2835e-01, -1.0000e+09,  7.9337e-02, -4.3756e-01, -1.0000e+09,
         3.5890e-03, -1.7587e-01, -1.4492e-01, -6.3356e-02, -9.7549e-02,
        -1.3126e-01, -2.1756e-02,  1.0879e-01, -1.0000e+09, -4.0138e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -5.0954e-01, -1.0000e+09,  1.8556e-01, -3.8111e-01,
        -1.0000e+09, -7.6559e-02, -1.5043e-01, -1.0000e+09, -1.9591e-02,
        -4.2835e-01, -1.0000e+09,  7.9337e-02, -4.3756e-01, -1.0000e+09,
         3.5890e-03, -1.7587e-01, -1.4492e-01, -6.3356e-02, -9.7549e-02,
        -1.3126e-01, -2.1756e-02,  1.0879e-01, -1.0000e+09, -4.0138e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -5.0954e-01, -1.0000e+09,  1.8556e-01, -3.8111e-01,
        -1.0000e+09, -7.6559e-02, -1.5043e-01, -1.0000e+09, -1.9591e-02,
        -4.2835e-01, -1.0000e+09,  7.9337e-02, -4.3756e-01, -1.0000e+09,
         3.5890e-03, -1.7587e-01, -1.4492e-01, -6.3356e-02, -9.7549e-02,
        -1.3126e-01, -2.1756e-02,  1.0879e-01, -1.0000e+09, -4.0138e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -5.0954e-01, -1.0000e+09,  1.8556e-01, -3.8111e-01,
        -1.0000e+09, -7.6559e-02, -1.5043e-01, -1.0000e+09, -1.9591e-02,
        -4.2835e-01, -1.0000e+09,  7.9337e-02, -4.3756e-01, -1.0000e+09,
         3.5890e-03, -1.7587e-01, -1.4492e-01, -6.3356e-02, -9.7549e-02,
        -1.3126e-01, -2.1756e-02,  1.0879e-01, -1.0000e+09, -4.0138e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -5.0954e-01, -1.0000e+09,  1.8556e-01, -3.8111e-01,
        -1.0000e+09, -7.6559e-02, -1.5043e-01, -1.0000e+09, -1.9591e-02,
        -4.2835e-01, -1.0000e+09,  7.9337e-02, -4.3756e-01, -1.0000e+09,
         3.5890e-03, -1.7587e-01, -1.4492e-01, -6.3356e-02, -9.7549e-02,
        -1.3126e-01, -2.1756e-02,  1.0879e-01, -1.0000e+09, -4.0138e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -5.0954e-01, -1.0000e+09,  1.8556e-01, -3.8111e-01,
        -1.0000e+09, -7.6559e-02, -1.5043e-01, -1.0000e+09, -1.9591e-02,
        -4.2835e-01, -1.0000e+09,  7.9337e-02, -4.3756e-01, -1.0000e+09,
         3.5890e-03, -1.7587e-01, -1.4492e-01, -6.3356e-02, -9.7549e-02,
        -1.3126e-01, -2.1756e-02,  1.0879e-01, -1.0000e+09, -4.0138e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -5.0954e-01, -1.0000e+09,  1.8556e-01, -3.8111e-01,
        -1.0000e+09, -7.6559e-02, -1.5043e-01, -1.0000e+09, -1.9591e-02,
        -4.2835e-01, -1.0000e+09,  7.9337e-02, -4.3756e-01, -1.0000e+09,
         3.5890e-03, -1.7587e-01, -1.4492e-01, -6.3356e-02, -9.7549e-02,
        -1.3126e-01, -2.1756e-02,  1.0879e-01, -1.0000e+09, -4.0138e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -5.0954e-01, -1.0000e+09,  1.8556e-01, -3.8111e-01,
        -1.0000e+09, -7.6559e-02, -1.5043e-01, -1.0000e+09, -1.9591e-02,
        -4.2835e-01, -1.0000e+09,  7.9337e-02, -4.3756e-01, -1.0000e+09,
         3.5890e-03, -1.7587e-01, -1.4492e-01, -6.3356e-02, -9.7549e-02,
        -1.3126e-01, -2.1756e-02,  1.0879e-01, -1.0000e+09, -4.0138e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -5.0954e-01, -1.0000e+09,  1.8556e-01, -3.8111e-01,
        -1.0000e+09, -7.6559e-02, -1.5043e-01, -1.0000e+09, -1.9591e-02,
        -4.2835e-01, -1.0000e+09,  7.9337e-02, -4.3756e-01, -1.0000e+09,
         3.5890e-03, -1.7587e-01, -1.4492e-01, -6.3356e-02, -9.7549e-02,
        -1.3126e-01, -2.1756e-02,  1.0879e-01, -1.0000e+09, -4.0138e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -3.7658e-02,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.2066e-01, -1.0000e+09, -1.5350e-01, -5.4984e-02,
        -1.0000e+09, -1.2903e-01, -4.1832e-01, -1.0000e+09, -1.2564e-01,
        -4.3748e-01, -1.0000e+09, -1.4712e-01, -1.1936e-01, -1.2967e-01,
        -1.5357e-01, -3.9005e-01, -1.0000e+09, -1.3009e-01, -4.3500e-01,
        -1.0000e+09, -1.7178e-01, -4.3475e-01, -1.0000e+09, -1.7709e-01,
        -3.7560e-01, -1.0000e+09, -8.5240e-02, -1.2196e-01, -1.0000e+09,
        -2.1756e-02, -3.2250e-01, -1.0000e+09, -1.3398e-01, -4.1832e-01,
        -1.0000e+09, -1.2990e-01, -4.2534e-01, -1.0000e+09, -1.4842e-01,
        -4.0188e-03, -1.0000e+09, -1.7587e-01, -3.2976e-01, -1.0000e+09,
         1.8556e-01, -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.2066e-01, -1.0000e+09, -1.5350e-01, -5.4984e-02,
        -1.0000e+09, -1.2903e-01, -4.1832e-01, -1.0000e+09, -1.2564e-01,
        -4.3748e-01, -1.0000e+09, -1.4712e-01, -1.1936e-01, -1.2967e-01,
        -1.5357e-01, -3.9005e-01, -1.0000e+09, -1.3009e-01, -4.3500e-01,
        -1.0000e+09, -1.7178e-01, -4.3475e-01, -1.0000e+09, -1.7709e-01,
        -3.7560e-01, -1.0000e+09, -8.5240e-02, -1.2196e-01, -1.0000e+09,
        -2.1756e-02, -3.2250e-01, -1.0000e+09, -1.3398e-01, -4.1832e-01,
        -1.0000e+09, -1.2990e-01, -4.2534e-01, -1.0000e+09, -1.4842e-01,
        -4.0188e-03, -1.0000e+09, -1.7587e-01, -3.2976e-01, -1.0000e+09,
         1.8556e-01, -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.2066e-01, -1.0000e+09, -1.5350e-01, -5.4984e-02,
        -1.0000e+09, -1.2903e-01, -4.1832e-01, -1.0000e+09, -1.2564e-01,
        -4.3748e-01, -1.0000e+09, -1.4712e-01, -1.1936e-01, -1.2967e-01,
        -1.5357e-01, -3.9005e-01, -1.0000e+09, -1.3009e-01, -4.3500e-01,
        -1.0000e+09, -1.7178e-01, -4.3475e-01, -1.0000e+09, -1.7709e-01,
        -3.7560e-01, -1.0000e+09, -8.5240e-02, -1.2196e-01, -1.0000e+09,
        -2.1756e-02, -3.2250e-01, -1.0000e+09, -1.3398e-01, -4.1832e-01,
        -1.0000e+09, -1.2990e-01, -4.2534e-01, -1.0000e+09, -1.4842e-01,
        -4.0188e-03, -1.0000e+09, -1.7587e-01, -3.2976e-01, -1.0000e+09,
         1.8556e-01, -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.2066e-01, -1.0000e+09, -1.5350e-01, -5.4984e-02,
        -1.0000e+09, -1.2903e-01, -4.1832e-01, -1.0000e+09, -1.2564e-01,
        -4.3748e-01, -1.0000e+09, -1.4712e-01, -1.1936e-01, -1.2967e-01,
        -1.5357e-01, -3.9005e-01, -1.0000e+09, -1.3009e-01, -4.3500e-01,
        -1.0000e+09, -1.7178e-01, -4.3475e-01, -1.0000e+09, -1.7709e-01,
        -3.7560e-01, -1.0000e+09, -8.5240e-02, -1.2196e-01, -1.0000e+09,
        -2.1756e-02, -3.2250e-01, -1.0000e+09, -1.3398e-01, -4.1832e-01,
        -1.0000e+09, -1.2990e-01, -4.2534e-01, -1.0000e+09, -1.4842e-01,
        -4.0188e-03, -1.0000e+09, -1.7587e-01, -3.2976e-01, -1.0000e+09,
         1.8556e-01, -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.2066e-01, -1.0000e+09, -1.5350e-01, -5.4984e-02,
        -1.0000e+09, -1.2903e-01, -4.1832e-01, -1.0000e+09, -1.2564e-01,
        -4.3748e-01, -1.0000e+09, -1.4712e-01, -1.1936e-01, -1.2967e-01,
        -1.5357e-01, -3.9005e-01, -1.0000e+09, -1.3009e-01, -4.3500e-01,
        -1.0000e+09, -1.7178e-01, -4.3475e-01, -1.0000e+09, -1.7709e-01,
        -3.7560e-01, -1.0000e+09, -8.5240e-02, -1.2196e-01, -1.0000e+09,
        -2.1756e-02, -3.2250e-01, -1.0000e+09, -1.3398e-01, -4.1832e-01,
        -1.0000e+09, -1.2990e-01, -4.2534e-01, -1.0000e+09, -1.4842e-01,
        -4.0188e-03, -1.0000e+09, -1.7587e-01, -3.2976e-01, -1.0000e+09,
         1.8556e-01, -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.2066e-01, -1.0000e+09, -1.5350e-01, -5.4984e-02,
        -1.0000e+09, -1.2903e-01, -4.1832e-01, -1.0000e+09, -1.2564e-01,
        -4.3748e-01, -1.0000e+09, -1.4712e-01, -1.1936e-01, -1.2967e-01,
        -1.5357e-01, -3.9005e-01, -1.0000e+09, -1.3009e-01, -4.3500e-01,
        -1.0000e+09, -1.7178e-01, -4.3475e-01, -1.0000e+09, -1.7709e-01,
        -3.7560e-01, -1.0000e+09, -8.5240e-02, -1.2196e-01, -1.0000e+09,
        -2.1756e-02, -3.2250e-01, -1.0000e+09, -1.3398e-01, -4.1832e-01,
        -1.0000e+09, -1.2990e-01, -4.2534e-01, -1.0000e+09, -1.4842e-01,
        -4.0188e-03, -1.0000e+09, -1.7587e-01, -3.2976e-01, -1.0000e+09,
         1.8556e-01, -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.2066e-01, -1.0000e+09, -1.5350e-01, -5.4984e-02,
        -1.0000e+09, -1.2903e-01, -4.1832e-01, -1.0000e+09, -1.2564e-01,
        -4.3748e-01, -1.0000e+09, -1.4712e-01, -1.1936e-01, -1.2967e-01,
        -1.5357e-01, -3.9005e-01, -1.0000e+09, -1.3009e-01, -4.3500e-01,
        -1.0000e+09, -1.7178e-01, -4.3475e-01, -1.0000e+09, -1.7709e-01,
        -3.7560e-01, -1.0000e+09, -8.5240e-02, -1.2196e-01, -1.0000e+09,
        -2.1756e-02, -3.2250e-01, -1.0000e+09, -1.3398e-01, -4.1832e-01,
        -1.0000e+09, -1.2990e-01, -4.2534e-01, -1.0000e+09, -1.4842e-01,
        -4.0188e-03, -1.0000e+09, -1.7587e-01, -3.2976e-01, -1.0000e+09,
         1.8556e-01, -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.2066e-01, -1.0000e+09, -1.5350e-01, -5.4984e-02,
        -1.0000e+09, -1.2903e-01, -4.1832e-01, -1.0000e+09, -1.2564e-01,
        -4.3748e-01, -1.0000e+09, -1.4712e-01, -1.1936e-01, -1.2967e-01,
        -1.5357e-01, -3.9005e-01, -1.0000e+09, -1.3009e-01, -4.3500e-01,
        -1.0000e+09, -1.7178e-01, -4.3475e-01, -1.0000e+09, -1.7709e-01,
        -3.7560e-01, -1.0000e+09, -8.5240e-02, -1.2196e-01, -1.0000e+09,
        -2.1756e-02, -3.2250e-01, -1.0000e+09, -1.3398e-01, -4.1832e-01,
        -1.0000e+09, -1.2990e-01, -4.2534e-01, -1.0000e+09, -1.4842e-01,
        -4.0188e-03, -1.0000e+09, -1.7587e-01, -3.2976e-01, -1.0000e+09,
         1.8556e-01, -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.2066e-01, -1.0000e+09, -1.5350e-01, -5.4984e-02,
        -1.0000e+09, -1.2903e-01, -4.1832e-01, -1.0000e+09, -1.2564e-01,
        -4.3748e-01, -1.0000e+09, -1.4712e-01, -1.1936e-01, -1.2967e-01,
        -1.5357e-01, -3.9005e-01, -1.0000e+09, -1.3009e-01, -4.3500e-01,
        -1.0000e+09, -1.7178e-01, -4.3475e-01, -1.0000e+09, -1.7709e-01,
        -3.7560e-01, -1.0000e+09, -8.5240e-02, -1.2196e-01, -1.0000e+09,
        -2.1756e-02, -3.2250e-01, -1.0000e+09, -1.3398e-01, -4.1832e-01,
        -1.0000e+09, -1.2990e-01, -4.2534e-01, -1.0000e+09, -1.4842e-01,
        -4.0188e-03, -1.0000e+09, -1.7587e-01, -3.2976e-01, -1.0000e+09,
         1.8556e-01, -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.2066e-01, -1.0000e+09, -1.5350e-01, -5.4984e-02,
        -1.0000e+09, -1.2903e-01, -4.1832e-01, -1.0000e+09, -1.2564e-01,
        -4.3748e-01, -1.0000e+09, -1.4712e-01, -1.1936e-01, -1.2967e-01,
        -1.5357e-01, -3.9005e-01, -1.0000e+09, -1.3009e-01, -4.3500e-01,
        -1.0000e+09, -1.7178e-01, -4.3475e-01, -1.0000e+09, -1.7709e-01,
        -3.7560e-01, -1.0000e+09, -8.5240e-02, -1.2196e-01, -1.0000e+09,
        -2.1756e-02, -3.2250e-01, -1.0000e+09, -1.3398e-01, -4.1832e-01,
        -1.0000e+09, -1.2990e-01, -4.2534e-01, -1.0000e+09, -1.4842e-01,
        -4.0188e-03, -1.0000e+09, -1.7587e-01, -3.2976e-01, -1.0000e+09,
         1.8556e-01, -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.2066e-01, -1.0000e+09, -1.5350e-01, -5.4984e-02,
        -1.0000e+09, -1.2903e-01, -4.1832e-01, -1.0000e+09, -1.2564e-01,
        -4.3748e-01, -1.0000e+09, -1.4712e-01, -1.1936e-01, -1.2967e-01,
        -1.5357e-01, -3.9005e-01, -1.0000e+09, -1.3009e-01, -4.3500e-01,
        -1.0000e+09, -1.7178e-01, -4.3475e-01, -1.0000e+09, -1.7709e-01,
        -3.7560e-01, -1.0000e+09, -8.5240e-02, -1.2196e-01, -1.0000e+09,
        -2.1756e-02, -3.2250e-01, -1.0000e+09, -1.3398e-01, -4.1832e-01,
        -1.0000e+09, -1.2990e-01, -4.2534e-01, -1.0000e+09, -1.4842e-01,
        -4.0188e-03, -1.0000e+09, -1.7587e-01, -3.2976e-01, -1.0000e+09,
         1.8556e-01, -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-1.0346e-01, -4.2066e-01, -1.0000e+09, -1.5350e-01, -5.4984e-02,
        -1.0000e+09, -1.2903e-01, -4.1832e-01, -1.0000e+09, -1.2564e-01,
        -4.3748e-01, -1.0000e+09, -1.4712e-01, -1.1936e-01, -1.2967e-01,
        -1.5357e-01, -3.9005e-01, -1.0000e+09, -1.3009e-01, -4.3500e-01,
        -1.0000e+09, -1.7178e-01, -4.3475e-01, -1.0000e+09, -1.7709e-01,
        -3.7560e-01, -1.0000e+09, -8.5240e-02, -1.2196e-01, -1.0000e+09,
        -2.1756e-02, -3.2250e-01, -1.0000e+09, -1.3398e-01, -4.1832e-01,
        -1.0000e+09, -1.2990e-01, -4.2534e-01, -1.0000e+09, -1.4842e-01,
        -4.0188e-03, -1.0000e+09, -1.7587e-01, -3.2976e-01, -1.0000e+09,
         1.8556e-01, -4.0138e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02,
        -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02, -1.0000e+09,
        -3.7658e-02, -3.7658e-02, -1.0000e+09, -3.7658e-02, -3.7658e-02,
        -1.0000e+09, -3.7658e-02

Tokens embeddings:
tensor([-0.1035, -0.0218, -0.1113, -0.0802, -0.1120, -0.0040, -0.1396,  0.0668,
        -0.0896, -0.1288, -0.0520, -0.2044, -0.0750, -0.0780, -0.0040, -0.1321,
        -0.1625, -0.1383, -0.1126, -0.1715, -0.0448, -0.0040, -0.1433, -0.0871,
        -0.1383,  0.1856, -0.1342, -0.0448, -0.0558,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0218, -0.1113, -0.0802, -0.1120, -0.0040, -0.1396,  0.0668,
        -0.0896, -0.1288, -0.0520, -0.2044, -0.0750, -0.0780, -0.0040, -

Tokens embeddings:
tensor([-0.1035, -0.0218, -0.1113, -0.0802, -0.1120, -0.0040, -0.1396,  0.0668,
        -0.0896, -0.1288, -0.0520, -0.2044, -0.0750, -0.0780, -0.0040, -0.1321,
        -0.1625, -0.1383, -0.1126, -0.1715, -0.0448, -0.0040, -0.1433, -0.0871,
        -0.1383,  0.1856, -0.1342, -0.0448, -0.0558,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0218, -0.1113, -0.0802, -0.1120, -0.0040, -0.1396,  0.0668,
        -0.0896, -0.1288, -0.0520, -0.2044, -0.0750, -0.0780, -0.0040, -

Tokens embeddings:
tensor([-0.1035, -0.0218, -0.1113, -0.0802, -0.1120, -0.0040, -0.1396,  0.0668,
        -0.0896, -0.1288, -0.0520, -0.2044, -0.0750, -0.0780, -0.0040, -0.1321,
        -0.1625, -0.1383, -0.1126, -0.1715, -0.0448, -0.0040, -0.1433, -0.0871,
        -0.1383,  0.1856, -0.1342, -0.0448, -0.0558,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0218, -0.1113, -0.0802, -0.1120, -0.0040, -0.1396,  0.0668,
        -0.0896, -0.1288, -0.0520, -0.2044, -0.0750, -0.0780, -0.0040, -

Tokens embeddings:
tensor([-0.1035, -0.0218, -0.1113, -0.0802, -0.1120, -0.0040, -0.1396,  0.0668,
        -0.0896, -0.1288, -0.0520, -0.2044, -0.0750, -0.0780, -0.0040, -0.1321,
        -0.1625, -0.1383, -0.1126, -0.1715, -0.0448, -0.0040, -0.1433, -0.0871,
        -0.1383,  0.1856, -0.1342, -0.0448, -0.0558,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0218, -0.1113, -0.0802, -0.1120, -0.0040, -0.1396,  0.0668,
        -0.0896, -0.1288, -0.0520, -0.2044, -0.0750, -0.0780, -0.0040, -

Tokens embeddings:
tensor([-0.1035, -0.0218, -0.1113, -0.0802, -0.1120, -0.0040, -0.1396,  0.0668,
        -0.0896, -0.1288, -0.0520, -0.2044, -0.0750, -0.0780, -0.0040, -0.1321,
        -0.1625, -0.1383, -0.1126, -0.1715, -0.0448, -0.0040, -0.1433, -0.0871,
        -0.1383,  0.1856, -0.1342, -0.0448, -0.0558,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0218, -0.1113, -0.0802, -0.1120, -0.0040, -0.1396,  0.0668,
        -0.0896, -0.1288, -0.0520, -0.2044, -0.0750, -0.0780, -0.0040, -

Tokens embeddings:
tensor([-0.1035, -0.0218, -0.1113, -0.0802, -0.1120, -0.0040, -0.1396,  0.0668,
        -0.0896, -0.1288, -0.0520, -0.2044, -0.0750, -0.0780, -0.0040, -0.1321,
        -0.1625, -0.1383, -0.1126, -0.1715, -0.0448, -0.0040, -0.1433, -0.0871,
        -0.1383,  0.1856, -0.1342, -0.0448, -0.0558,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0218, -0.1113, -0.0802, -0.1120, -0.0040, -0.1396,  0.0668,
        -0.0896, -0.1288, -0.0520, -0.2044, -0.0750, -0.0780, -0.0040, -

Tokens embeddings:
tensor([-0.1035, -0.0218, -0.1113, -0.0802, -0.1120, -0.0040, -0.1396,  0.0668,
        -0.0896, -0.1288, -0.0520, -0.2044, -0.0750, -0.0780, -0.0040, -0.1321,
        -0.1625, -0.1383, -0.1126, -0.1715, -0.0448, -0.0040, -0.1433, -0.0871,
        -0.1383,  0.1856, -0.1342, -0.0448, -0.0558,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0218, -0.1113, -0.0802, -0.1120, -0.0040, -0.1396,  0.0668,
        -0.0896, -0.1288, -0.0520, -0.2044, -0.0750, -0.0780, -0.0040, -

Tokens embeddings:
tensor([-0.1035, -0.0218, -0.1113, -0.0802, -0.1120, -0.0040, -0.1396,  0.0668,
        -0.0896, -0.1288, -0.0520, -0.2044, -0.0750, -0.0780, -0.0040, -0.1321,
        -0.1625, -0.1383, -0.1126, -0.1715, -0.0448, -0.0040, -0.1433, -0.0871,
        -0.1383,  0.1856, -0.1342, -0.0448, -0.0558,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0218, -0.1113, -0.0802, -0.1120, -0.0040, -0.1396,  0.0668,
        -0.0896, -0.1288, -0.0520, -0.2044, -0.0750, -0.0780, -0.0040, -

Tokens embeddings:
tensor([-0.1035, -0.0218, -0.1113, -0.0802, -0.1120, -0.0040, -0.1396,  0.0668,
        -0.0896, -0.1288, -0.0520, -0.2044, -0.0750, -0.0780, -0.0040, -0.1321,
        -0.1625, -0.1383, -0.1126, -0.1715, -0.0448, -0.0040, -0.1433, -0.0871,
        -0.1383,  0.1856, -0.1342, -0.0448, -0.0558,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0218, -0.1113, -0.0802, -0.1120, -0.0040, -0.1396,  0.0668,
        -0.0896, -0.1288, -0.0520, -0.2044, -0.0750, -0.0780, -0.0040, -

Tokens embeddings:
tensor([-0.1035, -0.0218, -0.1113, -0.0802, -0.1120, -0.0040, -0.1396,  0.0668,
        -0.0896, -0.1288, -0.0520, -0.2044, -0.0750, -0.0780, -0.0040, -0.1321,
        -0.1625, -0.1383, -0.1126, -0.1715, -0.0448, -0.0040, -0.1433, -0.0871,
        -0.1383,  0.1856, -0.1342, -0.0448, -0.0558,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0218, -0.1113, -0.0802, -0.1120, -0.0040, -0.1396,  0.0668,
        -0.0896, -0.1288, -0.0520, -0.2044, -0.0750, -0.0780, -0.0040, -

Tokens embeddings:
tensor([-0.1035, -0.0218, -0.1113, -0.0802, -0.1120, -0.0040, -0.1396,  0.0668,
        -0.0896, -0.1288, -0.0520, -0.2044, -0.0750, -0.0780, -0.0040, -0.1321,
        -0.1625, -0.1383, -0.1126, -0.1715, -0.0448, -0.0040, -0.1433, -0.0871,
        -0.1383,  0.1856, -0.1342, -0.0448, -0.0558,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0218, -0.1113, -0.0802, -0.1120, -0.0040, -0.1396,  0.0668,
        -0.0896, -0.1288, -0.0520, -0.2044, -0.0750, -0.0780, -0.0040, -

Tokens embeddings:
tensor([-0.1035, -0.0218, -0.1113, -0.0802, -0.1120, -0.0040, -0.1396,  0.0668,
        -0.0896, -0.1288, -0.0520, -0.2044, -0.0750, -0.0780, -0.0040, -0.1321,
        -0.1625, -0.1383, -0.1126, -0.1715, -0.0448, -0.0040, -0.1433, -0.0871,
        -0.1383,  0.1856, -0.1342, -0.0448, -0.0558,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0218, -0.1113, -0.0802, -0.1120, -0.0040, -0.1396,  0.0668,
        -0.0896, -0.1288, -0.0520, -0.2044, -0.0750, -0.0780, -0.0040, -

Tokens embeddings:
tensor([-0.1035, -0.0521, -0.0928, -0.1282, -0.1412, -0.1920, -0.0871, -0.0218,
        -0.0921,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0521, -0.0928, -0.1282, -0.1412, -0.1920, -0.0871, -0.0218,
        -0.0921,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.0521, -0.0928, -0.1282, -0.1412, -0.1920, -0.0871, -0.0218,
        -0.0921,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0521, -0.0928, -0.1282, -0.1412, -0.1920, -0.0871, -0.0218,
        -0.0921,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.0521, -0.0928, -0.1282, -0.1412, -0.1920, -0.0871, -0.0218,
        -0.0921,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0521, -0.0928, -0.1282, -0.1412, -0.1920, -0.0871, -0.0218,
        -0.0921,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.0521, -0.0928, -0.1282, -0.1412, -0.1920, -0.0871, -0.0218,
        -0.0921,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0521, -0.0928, -0.1282, -0.1412, -0.1920, -0.0871, -0.0218,
        -0.0921,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.0521, -0.0928, -0.1282, -0.1412, -0.1920, -0.0871, -0.0218,
        -0.0921,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0521, -0.0928, -0.1282, -0.1412, -0.1920, -0.0871, -0.0218,
        -0.0921,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.0521, -0.0928, -0.1282, -0.1412, -0.1920, -0.0871, -0.0218,
        -0.0921,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0521, -0.0928, -0.1282, -0.1412, -0.1920, -0.0871, -0.0218,
        -0.0921,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.0521, -0.0928, -0.1282, -0.1412, -0.1920, -0.0871, -0.0218,
        -0.0921,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0521, -0.0928, -0.1282, -0.1412, -0.1920, -0.0871, -0.0218,
        -0.0921,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.0521, -0.0928, -0.1282, -0.1412, -0.1920, -0.0871, -0.0218,
        -0.0921,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0521, -0.0928, -0.1282, -0.1412, -0.1920, -0.0871, -0.0218,
        -0.0921,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.0521, -0.0928, -0.1282, -0.1412, -0.1920, -0.0871, -0.0218,
        -0.0921,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0521, -0.0928, -0.1282, -0.1412, -0.1920, -0.0871, -0.0218,
        -0.0921,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.0521, -0.0928, -0.1282, -0.1412, -0.1920, -0.0871, -0.0218,
        -0.0921,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0521, -0.0928, -0.1282, -0.1412, -0.1920, -0.0871, -0.0218,
        -0.0921,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.0521, -0.0928, -0.1282, -0.1412, -0.1920, -0.0871, -0.0218,
        -0.0921,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0521, -0.0928, -0.1282, -0.1412, -0.1920, -0.0871, -0.0218,
        -0.0921,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.0521, -0.0928, -0.1282, -0.1412, -0.1920, -0.0871, -0.0218,
        -0.0921,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0521, -0.0928, -0.1282, -0.1412, -0.1920, -0.0871, -0.0218,
        -0.0921,  0.1856, -0.0401, -0.0377, -0.0377, -0.0377, -0.0377, -

Tokens embeddings:
tensor([-0.1035, -0.0856, -0.0856, -0.1209, -0.1594, -0.0934, -0.0954, -0.0879,
        -0.0525, -0.0538, -0.1209, -0.0229, -0.1595, -0.0126, -0.0782, -0.0526,
        -0.1407, -0.0218,  0.2349, -0.1771, -0.1775, -0.1484, -0.0802, -0.1433,
        -0.0040, -0.1759, -0.1759, -0.0795, -0.1347,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0856, -0.0856, -0.1209, -0.1594, -0.0934, -0.0954, -0.0879,
        -0.0525, -0.0538, -0.1209, -0.0229, -0.1595, -0.0126, -0.0782, -

Tokens embeddings:
tensor([-0.1035, -0.0856, -0.0856, -0.1209, -0.1594, -0.0934, -0.0954, -0.0879,
        -0.0525, -0.0538, -0.1209, -0.0229, -0.1595, -0.0126, -0.0782, -0.0526,
        -0.1407, -0.0218,  0.2349, -0.1771, -0.1775, -0.1484, -0.0802, -0.1433,
        -0.0040, -0.1759, -0.1759, -0.0795, -0.1347,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0856, -0.0856, -0.1209, -0.1594, -0.0934, -0.0954, -0.0879,
        -0.0525, -0.0538, -0.1209, -0.0229, -0.1595, -0.0126, -0.0782, -

Tokens embeddings:
tensor([-0.1035, -0.0856, -0.0856, -0.1209, -0.1594, -0.0934, -0.0954, -0.0879,
        -0.0525, -0.0538, -0.1209, -0.0229, -0.1595, -0.0126, -0.0782, -0.0526,
        -0.1407, -0.0218,  0.2349, -0.1771, -0.1775, -0.1484, -0.0802, -0.1433,
        -0.0040, -0.1759, -0.1759, -0.0795, -0.1347,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0856, -0.0856, -0.1209, -0.1594, -0.0934, -0.0954, -0.0879,
        -0.0525, -0.0538, -0.1209, -0.0229, -0.1595, -0.0126, -0.0782, -

Tokens embeddings:
tensor([-0.1035, -0.0856, -0.0856, -0.1209, -0.1594, -0.0934, -0.0954, -0.0879,
        -0.0525, -0.0538, -0.1209, -0.0229, -0.1595, -0.0126, -0.0782, -0.0526,
        -0.1407, -0.0218,  0.2349, -0.1771, -0.1775, -0.1484, -0.0802, -0.1433,
        -0.0040, -0.1759, -0.1759, -0.0795, -0.1347,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0856, -0.0856, -0.1209, -0.1594, -0.0934, -0.0954, -0.0879,
        -0.0525, -0.0538, -0.1209, -0.0229, -0.1595, -0.0126, -0.0782, -

Tokens embeddings:
tensor([-0.1035, -0.0856, -0.0856, -0.1209, -0.1594, -0.0934, -0.0954, -0.0879,
        -0.0525, -0.0538, -0.1209, -0.0229, -0.1595, -0.0126, -0.0782, -0.0526,
        -0.1407, -0.0218,  0.2349, -0.1771, -0.1775, -0.1484, -0.0802, -0.1433,
        -0.0040, -0.1759, -0.1759, -0.0795, -0.1347,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0856, -0.0856, -0.1209, -0.1594, -0.0934, -0.0954, -0.0879,
        -0.0525, -0.0538, -0.1209, -0.0229, -0.1595, -0.0126, -0.0782, -

Tokens embeddings:
tensor([-0.1035, -0.0856, -0.0856, -0.1209, -0.1594, -0.0934, -0.0954, -0.0879,
        -0.0525, -0.0538, -0.1209, -0.0229, -0.1595, -0.0126, -0.0782, -0.0526,
        -0.1407, -0.0218,  0.2349, -0.1771, -0.1775, -0.1484, -0.0802, -0.1433,
        -0.0040, -0.1759, -0.1759, -0.0795, -0.1347,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0856, -0.0856, -0.1209, -0.1594, -0.0934, -0.0954, -0.0879,
        -0.0525, -0.0538, -0.1209, -0.0229, -0.1595, -0.0126, -0.0782, -

Tokens embeddings:
tensor([-0.1035, -0.0856, -0.0856, -0.1209, -0.1594, -0.0934, -0.0954, -0.0879,
        -0.0525, -0.0538, -0.1209, -0.0229, -0.1595, -0.0126, -0.0782, -0.0526,
        -0.1407, -0.0218,  0.2349, -0.1771, -0.1775, -0.1484, -0.0802, -0.1433,
        -0.0040, -0.1759, -0.1759, -0.0795, -0.1347,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0856, -0.0856, -0.1209, -0.1594, -0.0934, -0.0954, -0.0879,
        -0.0525, -0.0538, -0.1209, -0.0229, -0.1595, -0.0126, -0.0782, -

Tokens embeddings:
tensor([-0.1035, -0.0856, -0.0856, -0.1209, -0.1594, -0.0934, -0.0954, -0.0879,
        -0.0525, -0.0538, -0.1209, -0.0229, -0.1595, -0.0126, -0.0782, -0.0526,
        -0.1407, -0.0218,  0.2349, -0.1771, -0.1775, -0.1484, -0.0802, -0.1433,
        -0.0040, -0.1759, -0.1759, -0.0795, -0.1347,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0856, -0.0856, -0.1209, -0.1594, -0.0934, -0.0954, -0.0879,
        -0.0525, -0.0538, -0.1209, -0.0229, -0.1595, -0.0126, -0.0782, -

Tokens embeddings:
tensor([-0.1035, -0.0856, -0.0856, -0.1209, -0.1594, -0.0934, -0.0954, -0.0879,
        -0.0525, -0.0538, -0.1209, -0.0229, -0.1595, -0.0126, -0.0782, -0.0526,
        -0.1407, -0.0218,  0.2349, -0.1771, -0.1775, -0.1484, -0.0802, -0.1433,
        -0.0040, -0.1759, -0.1759, -0.0795, -0.1347,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0856, -0.0856, -0.1209, -0.1594, -0.0934, -0.0954, -0.0879,
        -0.0525, -0.0538, -0.1209, -0.0229, -0.1595, -0.0126, -0.0782, -

Tokens embeddings:
tensor([-0.1035, -0.0856, -0.0856, -0.1209, -0.1594, -0.0934, -0.0954, -0.0879,
        -0.0525, -0.0538, -0.1209, -0.0229, -0.1595, -0.0126, -0.0782, -0.0526,
        -0.1407, -0.0218,  0.2349, -0.1771, -0.1775, -0.1484, -0.0802, -0.1433,
        -0.0040, -0.1759, -0.1759, -0.0795, -0.1347,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0856, -0.0856, -0.1209, -0.1594, -0.0934, -0.0954, -0.0879,
        -0.0525, -0.0538, -0.1209, -0.0229, -0.1595, -0.0126, -0.0782, -

Tokens embeddings:
tensor([-0.1035, -0.0856, -0.0856, -0.1209, -0.1594, -0.0934, -0.0954, -0.0879,
        -0.0525, -0.0538, -0.1209, -0.0229, -0.1595, -0.0126, -0.0782, -0.0526,
        -0.1407, -0.0218,  0.2349, -0.1771, -0.1775, -0.1484, -0.0802, -0.1433,
        -0.0040, -0.1759, -0.1759, -0.0795, -0.1347,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0856, -0.0856, -0.1209, -0.1594, -0.0934, -0.0954, -0.0879,
        -0.0525, -0.0538, -0.1209, -0.0229, -0.1595, -0.0126, -0.0782, -

Tokens embeddings:
tensor([-0.1035, -0.0856, -0.0856, -0.1209, -0.1594, -0.0934, -0.0954, -0.0879,
        -0.0525, -0.0538, -0.1209, -0.0229, -0.1595, -0.0126, -0.0782, -0.0526,
        -0.1407, -0.0218,  0.2349, -0.1771, -0.1775, -0.1484, -0.0802, -0.1433,
        -0.0040, -0.1759, -0.1759, -0.0795, -0.1347,  0.1856, -0.0401, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377, -0.0377,
        -0.0377])
arg max of tensor([-0.1035, -0.0856, -0.0856, -0.1209, -0.1594, -0.0934, -0.0954, -0.0879,
        -0.0525, -0.0538, -0.1209, -0.0229, -0.1595, -0.0126, -0.0782, -

In [34]:
# Pooling First token [CLS] for each sentence - argmax

# Select some sentences randomly
sent_index = [21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64]

for s in sent_index:
  print("*" * 100)
  # Get the sentence's words
  tokens = test_inputs[s]
  # For each layer...
  for l in range(12):
    print("\nLayer", l+1)
    attention = np.squeeze(test_attentions[s][l].detach().cpu().numpy(), axis=0)
    # and for each head
    #for h, head in enumerate(attention):
    #print("Head", h+1)
    # Get the attention for the cls token
    encoded_tokens = bert_tokenizer(tokens, padding=True, truncation=True, max_length=128, return_tensors='pt')
    encoded_tokens = encoded_tokens.to(device)
    with torch.no_grad():
      model_output1 = model_e(**encoded_tokens)
      tokens_embeddings = model_output1[0][:,0]
      tokens_embeddings = tokens_embeddings.cpu()
      cls_attentions = head[0]
      display(HTML(colorize(tokens, cls_attentions)))
      print("Tokens embeddings:")
      print(tokens_embeddings)
      arg = argmax(tokens_embeddings)
      print('arg max of %s: %d' % (tokens_embeddings, arg))

Output hidden; open in https://colab.research.google.com to view.

In [35]:
# Max Pooling for all tokens in sentences and argmax

# Select some sentences randomly
sent_index = [21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64]

for s in sent_index:
  print("*" * 100)
  # Get the sentence's words
  tokens = test_inputs[s]
  # For each layer...
  for l in range(12):
    print("\nLayer", l+1)
    attention = np.squeeze(test_attentions[s][l].detach().cpu().numpy(), axis=0)
    # and for each head
    #print("Head", h+1)
    # Get the attention for the cls token
    encoded_tokens = bert_tokenizer(tokens, truncation=True, padding=True, max_length=128, return_tensors='pt')
    encoded_tokens = encoded_tokens.to(device)
    with torch.no_grad():
      model_output1 = model_e(**encoded_tokens)
      tokens_embeddings = max_pooling(model_output1, encoded_tokens['attention_mask'])
      tokens_embeddings = tokens_embeddings.cpu()
      cls_attentions = head[0]
      display(HTML(colorize(tokens, cls_attentions)))
      print("Tokens embeddings:")
      print(tokens_embeddings)
      arg = argmax(tokens_embeddings)
      print('arg max of %s: %d' % (tokens_embeddings, arg))

Output hidden; open in https://colab.research.google.com to view.