In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [2]:
%%capture
!pip install transformers

In [3]:
import torch
import numpy as np
import pandas as pd
import torch.nn as nn

import transformers
from sklearn.metrics import *
from transformers import AdamW
from tqdm.notebook import tqdm
from scipy.special import softmax
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split as tts
from transformers import BertTokenizerFast, BertConfig, BertForSequenceClassification, AutoModel
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

In [4]:
# Define the device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

#### Download

In [5]:
cd drive/My Drive/Colab Notebooks/experiments

/content/drive/My Drive/Colab Notebooks/experiments


In [6]:
# Download humor detection data
# Paper: https://arxiv.org/abs/2004.12765
data = pd.read_csv("data/trofi.csv")
print("\nThere are", len(data), "sentences")

# Use the standard text/label columns
# Create labels: 1 --> humorous, 0 --> not humorous
data["label"] = data["label"].apply(int)
data.head()


There are 3737 sentences


Unnamed: 0,verb,sentence,verb_idx,label
0,absorb,An Energy Department spokesman says the sulfur...,22,0
1,absorb,The yellow beta carotene pigment absorbs blue ...,5,0
2,absorb,"This time , the ground absorbed the shock wave...",5,0
3,absorb,'' Vitamins could be passed right out of the b...,12,0
4,absorb,"As Eliot wrote : '' In a warm haze , the sultr...",14,0


#### Split to training, validation and test



In [7]:
# Use a subset for quick experiments
#subset_data = data[:10000]

# Split to train, val and test
train, test = tts(data[["sentence", "label"]], random_state=42, test_size=0.1)
train, val = tts(train, random_state=42, test_size=test.shape[0])

#### Tokenize and encode with BERT tokenizer

In [8]:
# Construct a BERT tokenizer based on WordPiece
bert_tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=466062.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=28.0, style=ProgressStyle(description_w…




In [9]:
# A sanity check of the tokenizer
encoded_instance = bert_tokenizer.batch_encode_plus([train.iloc[0].sentence], padding=True)
print(encoded_instance)

{'input_ids': [[101, 2061, 2057, 1005, 2128, 8040, 17686, 1996, 3663, 1010, 10209, 2067, 1010, 1998, 2035, 1997, 1037, 5573, 2023, 3124, 4152, 2023, 9577, 8460, 1010, 2059, 2515, 1037, 2440, 1011, 2006, 13297, 14257, 2121, 1010, 2059, 1037, 2440, 1011, 2006, 10236, 24490, 3013, 5963, 1010, 2059, 1037, 2440, 1011, 2006, 10245, 2128, 4765, 2854, 102]], 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]}


In [10]:
print("Original text:", train.iloc[0].sentence)
print("BERT BPEs:", bert_tokenizer.convert_ids_to_tokens(encoded_instance["input_ids"][0]))

Original text: So we 're scoping the situation , kicking back , and all of a sudden this guy gets this insane barrel , then does a full - on airplane floater , then a full - on wraparound cutback , then a full - on snap reentry 
BERT BPEs: ['[CLS]', 'so', 'we', "'", 're', 'sc', '##oping', 'the', 'situation', ',', 'kicking', 'back', ',', 'and', 'all', 'of', 'a', 'sudden', 'this', 'guy', 'gets', 'this', 'insane', 'barrel', ',', 'then', 'does', 'a', 'full', '-', 'on', 'airplane', 'float', '##er', ',', 'then', 'a', 'full', '-', 'on', 'wrap', '##around', 'cut', '##back', ',', 'then', 'a', 'full', '-', 'on', 'snap', 're', '##ent', '##ry', '[SEP]']


In [11]:
# Set max_len to the maximum length of the training data 
max_len = max([len(bert_tokenizer.encode(s)) for s in train.sentence.to_list()])
print("The maximum sentence length in training based on BERT BPEs is", max_len)

The maximum sentence length in training based on BERT BPEs is 112


In [12]:
# Tokenize and encode sentences in each set
x_train = bert_tokenizer.batch_encode_plus(
    train.sentence.tolist(),
    max_length = max_len,
    padding=True,
    truncation=True
)
x_val = bert_tokenizer.batch_encode_plus(
    val.sentence.tolist(),
    max_length = max_len,
    padding=True,
    truncation=True
)
x_test = bert_tokenizer.batch_encode_plus(
    test.sentence.tolist(),
    max_length = max_len,
    padding=True,
    truncation=True
)

In [13]:
# Convert lists to tensors in order to feed them to our PyTorch model
train_seq = torch.tensor(x_train['input_ids'])
train_mask = torch.tensor(x_train['attention_mask'])
train_y = torch.tensor(train.label.tolist())

val_seq = torch.tensor(x_val['input_ids'])
val_mask = torch.tensor(x_val['attention_mask'])
val_y = torch.tensor(val.label.tolist())

test_seq = torch.tensor(x_test['input_ids'])
test_mask = torch.tensor(x_test['attention_mask'])
test_y = torch.tensor(test.label.tolist())

In [14]:
batch_size = 32

# Create a dataloader for each set

# TensorDataset: Creates a PyTorch dataset object to load data from
train_data = TensorDataset(train_seq, train_mask, train_y)
# RandomSampler: specify the sequence of indices/keys used in data loading
train_sampler = RandomSampler(train_data)
# DataLoader: a Python iterable over a dataset
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

val_data = TensorDataset(val_seq, val_mask, val_y)
val_sampler = SequentialSampler(val_data)
val_dataloader = DataLoader(val_data, sampler=val_sampler, batch_size=batch_size)

test_data = TensorDataset(test_seq, test_mask, test_y)
test_sampler = SequentialSampler(test_data)
test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=1)

## Build and train the model

In [15]:
# Define which BERT model to use
# We will use BERT base pre-trained on uncased text
model_name = "bert-base-uncased"
# The BertForSequenceClassification class creates a model with BERT and a classifier on top
# The classifier is a linear layer with two outputs (two is the default, if you have more labels change the config)
# It uses the CrossEntropyLoss from PyTorch
# from_pretrained() is used to load pre-trained weights
model = BertForSequenceClassification.from_pretrained(model_name, output_attentions=True)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=570.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=440473133.0, style=ProgressStyle(descri…




Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [16]:
# Training method
def training():
  # Set to train mode
  model.train()
  total_loss, total_accuracy = 0, 0
  # Iterate through the training batches
  for batch in tqdm(train_dataloader, desc="Iteration"):    
    # Push the batch to gpu
    batch = [r.to(device) for r in batch] 
    sent_id, mask, labels = batch
    # Clear gradients 
    model.zero_grad()
    # Get model outputs
    outputs = model(sent_id, attention_mask=mask, labels=labels)
    # Get loss
    loss = outputs.loss
    # Add to the total loss
    total_loss = total_loss + loss
    # Backward pass to calculate the gradients
    loss.backward()
    # Update parameters
    optimizer.step()
  # Compute the training loss of the epoch
  epoch_loss = total_loss / len(train_dataloader)

  return epoch_loss

In [17]:
# Evaluation method
def evaluate():  
  print("\nEvaluating...")  
  # Set to eval mode
  model.eval()
  total_loss, total_accuracy = 0, 0
  predictions, targets = [], []
  # Iterate through the validation batches
  for batch in val_dataloader:
    # Push the batch to gpu
    batch = [t.to(device) for t in batch]
    sent_id, mask, labels = batch
    # Save the gold labels to use them for evaluation
    targets.extend(labels.detach().cpu().numpy())
    # Deactivate autograd
    with torch.no_grad():
      # Get model outputs
      outputs = model(sent_id, attention_mask=mask, labels=labels)
      # Get loss
      loss = outputs.loss
      total_loss = total_loss + loss
      # Apply softmax to the output of the model
      output_probs = softmax(outputs.logits.detach().cpu().numpy(), axis=1)
      # Get the index with the largest probability as the predicted label
      predictions.extend(np.argmax(output_probs, axis=1))
  # Compute the validation loss of the epoch
  epoch_loss = total_loss / len(val_dataloader)

  return epoch_loss, targets, predictions

In [18]:
# Push model to gpu
model = model.to(device)
# Define the optimizer and the learning rate
optimizer = AdamW(model.parameters(), lr = 2e-5)

best_val_loss = float('inf')
best_epoch = -1
train_losses=[]
val_losses=[]
epochs = 5
# Define the number of epochs to wait for early stopping
patience = 3

# Train the model
for epoch in range(epochs):     
  print('\n Epoch {:} / {:}'.format(epoch + 1, epochs))    
  train_loss = training()
  val_loss, val_targets, val_predictions = evaluate()

  train_losses.append(train_loss)
  val_losses.append(val_loss)

  print("\nTraining Loss:", train_loss)
  print("Validation Loss:", val_loss)
  # Calculate the validation F1 score for the current epoch
  f1 = f1_score(val_targets, val_predictions, average="binary")
  print("F1 score:", round(f1, 3))

  # Save the model with the best validation loss
  if val_loss < best_val_loss:
    best_val_loss = val_loss
    best_epoch = epoch
    torch.save(model.state_dict(), 'saved_weights.pt')

  # Early stopping
  if ((epoch - best_epoch) >= patience):
    print("No improvement in", patience, "epochs. Stopped training.")
    break



 Epoch 1 / 5


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=94.0, style=ProgressStyle(description_wid…



Evaluating...

Training Loss: tensor(0.6666, device='cuda:0', grad_fn=<DivBackward0>)
Validation Loss: tensor(0.6648, device='cuda:0')
F1 score: 0.452

 Epoch 2 / 5


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=94.0, style=ProgressStyle(description_wid…



Evaluating...

Training Loss: tensor(0.5860, device='cuda:0', grad_fn=<DivBackward0>)
Validation Loss: tensor(0.6363, device='cuda:0')
F1 score: 0.602

 Epoch 3 / 5


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=94.0, style=ProgressStyle(description_wid…



Evaluating...

Training Loss: tensor(0.4085, device='cuda:0', grad_fn=<DivBackward0>)
Validation Loss: tensor(0.7090, device='cuda:0')
F1 score: 0.625

 Epoch 4 / 5


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=94.0, style=ProgressStyle(description_wid…



Evaluating...

Training Loss: tensor(0.2112, device='cuda:0', grad_fn=<DivBackward0>)
Validation Loss: tensor(0.8468, device='cuda:0')
F1 score: 0.62

 Epoch 5 / 5


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=94.0, style=ProgressStyle(description_wid…



Evaluating...

Training Loss: tensor(0.1278, device='cuda:0', grad_fn=<DivBackward0>)
Validation Loss: tensor(1.1329, device='cuda:0')
F1 score: 0.557
No improvement in 3 epochs. Stopped training.


In [19]:
# Save checkpoint to your drive
# Zip
#!zip saved_weights.zip  saved_weights.pt
# Mount
#from google.colab import drive
#drive.mount('/content/gdrive')
# Copy to your drive folder
#!cp -r saved_weights.zip /content/gdrive/MyDrive/

## Inference

#### Load the saved checkpoint

In [20]:
# Use this code to download the model saved in your drive 
# Add the id from the shareable link of the file 
# !gdown --id add_shareable_link_id
# !unzip saved_weights.zip

In [21]:
# Create the model
model_e = BertForSequenceClassification.from_pretrained("bert-base-uncased", output_attentions=True)
# Load pre-trained weights
#checkpoint = torch.load("saved_weights.pt", map_location="cpu")
# Add them to the model
#model_e.load_state_dict(checkpoint)
model_e = model_e.to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

#### Get predictions for test

In [22]:
# Predict for the test set and save the results
model_e.eval()
test_predictions = []
test_targets = []
test_attentions = []
test_inputs = []

for batch in test_dataloader:
  batch = [t.to(device) for t in batch]
  sent_id, mask, labels = batch
  # Get gold labels
  test_targets.extend(labels.detach().cpu().numpy())
  # Get input words
  test_inputs.append(bert_tokenizer.convert_ids_to_tokens(sent_id.detach().cpu().numpy()[0]))
  with torch.no_grad():
    # Get predictions
    outputs = model_e(sent_id, attention_mask=mask)
    # Apply softmax to the outputs
    output_probs = softmax(outputs.logits.detach().cpu().numpy(), axis=1)
    # Get the with the highest probability as the predicted label
    test_predictions.extend(np.argmax(output_probs, axis=1))
    # Get attention weights
    # Attention weights from all layers are returned in a tuple
    # The weights from each layer are in a tensor with shape (batch_size, attention_heads, max_len, max_len)
    test_attentions.append(outputs.attentions)

#### Evaluate

In [23]:
print("F1:", f1_score(test_targets, test_predictions, average="binary"))
print("ACC:", accuracy_score(test_targets, test_predictions))
print("AUPR:", average_precision_score(test_targets, test_predictions))
print("PRECISION:", precision_score(test_targets, test_predictions))
print("RECALL:", recall_score(test_targets, test_predictions))
print("AUC:", roc_auc_score(test_targets, test_predictions))

F1: 0.5544147843942505
ACC: 0.4197860962566845
AUPR: 0.39800977684892314
PRECISION: 0.39823008849557523
RECALL: 0.9121621621621622
AUC: 0.5047536474527625


In [None]:
#max pooling to generate a fixed sized sentence embedding


#Max Pooling - Take the max value over time for every dimension
def max_pooling(model_output, attention_mask):
    token_embeddings = model_output[0] #First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.resize_(token_embeddings.size())
    #input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    token_embeddings[input_mask_expanded == 0] = -1e9  # Set padding tokens to large negative value
    max_over_time = torch.max(token_embeddings, 1)[0]
    return max_over_time

def avg_pooling(model_output, attention_mask):
    token_embeddings = model_output[0] #First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.resize_(token_embeddings.size())
    #input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    token_embeddings[input_mask_expanded == 0] = -1e9  # Set padding tokens to large negative value
    avg_over_time = torch.mean(token_embeddings, 1)[0]
    return avg_over_time


#Sentences we want sentence embeddings for
sentences = ['The', 'stars', 'gravitate', 'towards', 'each', 'other.']

#Tokenize sentences
encoded_input = bert_tokenizer(sentences, padding=True, truncation=True, max_length=128, return_tensors='pt')
encoded_input = encoded_input.to(device)

#Compute token embeddings
with torch.no_grad():
    model_output = model_e(**encoded_input)

#Perform pooling. In this case, max pooling
sentence_embeddings = max_pooling(model_output, encoded_input['attention_mask'])
#avg_sentence_embeddings = avg_pooling(model_output, encoded_input['attention_mask'])


print("Sentence embeddings:")
print(sentence_embeddings)

Sentence embeddings:
tensor([-3.5217e-01, -2.3322e-01, -1.9730e-01, -2.6875e-01, -1.0000e+09,
        -1.5048e-01], device='cuda:0')


In [None]:
# numpy implementation of argmax
from numpy import argmax

sentence_embeddings = sentence_embeddings.cpu()

# get argmax
result = argmax(sentence_embeddings)
print('arg max of %s: %d' % (sentence_embeddings, result))

arg max of tensor([-3.5217e-01, -2.3322e-01, -1.9730e-01, -2.6875e-01, -1.0000e+09,
        -1.5048e-01]): 5


In [None]:
#CLS token of each input represents the sentence embedding


#Sentences we want sentence embeddings for
sentences = ['The', 'stars', 'gravitate', 'towards', 'each', 'other']


#Tokenize sentences
encoded_input = bert_tokenizer(sentences, padding=True, truncation=True, max_length=128, return_tensors='pt')
encoded_input = encoded_input.to(device)

#Compute token embeddings
with torch.no_grad():
    model_output = model(**encoded_input)
    #model_output = model_output.to(device)
    
sentence_embeddings = model_output[0][:,0] #Take the first token ([CLS]) from each sentence 

print("Sentence embeddings:")
print(sentence_embeddings)

Sentence embeddings:
tensor([0.3710, 0.9909, 2.4635, 1.4161, 0.8924, 0.4561], device='cuda:0')


In [None]:
# numpy implementation of argmax
from numpy import argmax

sentence_embeddings = sentence_embeddings.cpu()

# get argmax
result = argmax(sentence_embeddings)
print('arg max of %s: %d' % (sentence_embeddings, result))

arg max of tensor([0.3710, 0.9909, 2.4635, 1.4161, 0.8924, 0.4561]): 2


## Attention analysis

In [None]:
# Get attention heatmaps
import matplotlib
from IPython.core.display import display, HTML
def colorize(words, color_array):
    cmap=matplotlib.cm.Reds
    template = '<span class="barcode"; style="color: black; background-color: {}">{}</span>'
    colored_string = ''
    for word, color in zip(words, color_array):
        color = matplotlib.colors.rgb2hex(cmap(color)[:3])
        colored_string += template.format(color, '&nbsp' + word + '&nbsp')
    return colored_string

#### What does the CLS token attend to?




In [None]:
# Max Pooling for all tokens in sentences and argmax

# Select some sentences randomly
sent_index = [0,1,2]

for s in sent_index:
  print("*" * 100)
  # Get the sentence's words
  tokens = test_inputs[s]
  # For each layer...
  for l in range(12):
    print("\nLayer", l+1)
    attention = np.squeeze(test_attentions[s][l].detach().cpu().numpy(), axis=0)
    # and for each head
    for h, head in enumerate(attention):
      print("Head", h+1)
      # Get the attention for the cls token
      encoded_tokens = bert_tokenizer(tokens, truncation=True, padding=True, max_length=128, return_tensors='pt')
      encoded_tokens = encoded_tokens.to(device)
      with torch.no_grad():
        model_output1 = model_e(**encoded_tokens)
        tokens_embeddings = max_pooling(model_output1, encoded_tokens['attention_mask'])
        tokens_embeddings = tokens_embeddings.cpu()
        cls_attentions = head[0]
        display(HTML(colorize(tokens, cls_attentions)))
        print("Tokens embeddings:")
        print(tokens_embeddings)
        arg = argmax(tokens_embeddings)
        print('arg max of %s: %d' % (tokens_embeddings, arg))

Output hidden; open in https://colab.research.google.com to view.

In [None]:
# Pooling First token [CLS] for each sentence - argmax

# Select some sentences randomly
sent_index = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

for s in sent_index:
  print("*" * 100)
  # Get the sentence's words
  tokens = test_inputs[s]
  # For each layer...
  for l in range(12):
    print("\nLayer", l+1)
    attention = np.squeeze(test_attentions[s][l].detach().cpu().numpy(), axis=0)
    # and for each head
    #for h, head in enumerate(attention):
    #print("Head", h+1)
    # Get the attention for the cls token
    encoded_tokens = bert_tokenizer(tokens, padding=True, truncation=True, max_length=128, return_tensors='pt')
    encoded_tokens = encoded_tokens.to(device)
    with torch.no_grad():
      model_output1 = model_e(**encoded_tokens)
      tokens_embeddings = model_output1[0][:,0]
      tokens_embeddings = tokens_embeddings.cpu()
      cls_attentions = head[0]
      display(HTML(colorize(tokens, cls_attentions)))
      print("Tokens embeddings:")
      print(tokens_embeddings)
      arg = argmax(tokens_embeddings)
      print('arg max of %s: %d' % (tokens_embeddings, arg))

****************************************************************************************************

Layer 1


Tokens embeddings:
tensor([-0.3412, -0.4396, -0.3814, -0.4744, -0.3731, -0.3842, -0.4739, -0.4287,
        -0.3741, -0.4744, -0.2325, -0.2076, -0.4744, -0.1333, -0.1959, -0.4651,
        -0.4356, -0.4422, -0.2983, -0.2218, -0.4224, -0.3163, -0.2839, -0.3842,
        -0.2896, -0.4486, -0.1518, -0.3528, -0.4505, -0.3925, -0.3167, -0.4224,
        -0.3598, -0.3842, -0.4367, -0.0990, -0.3438, -0.6103, -0.4486, -0.1518,
        -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4396, -0.3814, -0.4744, -0.3731, -0.3842, -0.4739, -0.4287,
        -0.3741, -0.4744, -0.2325, -

Tokens embeddings:
tensor([-0.3412, -0.4396, -0.3814, -0.4744, -0.3731, -0.3842, -0.4739, -0.4287,
        -0.3741, -0.4744, -0.2325, -0.2076, -0.4744, -0.1333, -0.1959, -0.4651,
        -0.4356, -0.4422, -0.2983, -0.2218, -0.4224, -0.3163, -0.2839, -0.3842,
        -0.2896, -0.4486, -0.1518, -0.3528, -0.4505, -0.3925, -0.3167, -0.4224,
        -0.3598, -0.3842, -0.4367, -0.0990, -0.3438, -0.6103, -0.4486, -0.1518,
        -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4396, -0.3814, -0.4744, -0.3731, -0.3842, -0.4739, -0.4287,
        -0.3741, -0.4744, -0.2325, -

Tokens embeddings:
tensor([-0.3412, -0.4396, -0.3814, -0.4744, -0.3731, -0.3842, -0.4739, -0.4287,
        -0.3741, -0.4744, -0.2325, -0.2076, -0.4744, -0.1333, -0.1959, -0.4651,
        -0.4356, -0.4422, -0.2983, -0.2218, -0.4224, -0.3163, -0.2839, -0.3842,
        -0.2896, -0.4486, -0.1518, -0.3528, -0.4505, -0.3925, -0.3167, -0.4224,
        -0.3598, -0.3842, -0.4367, -0.0990, -0.3438, -0.6103, -0.4486, -0.1518,
        -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4396, -0.3814, -0.4744, -0.3731, -0.3842, -0.4739, -0.4287,
        -0.3741, -0.4744, -0.2325, -

Tokens embeddings:
tensor([-0.3412, -0.4396, -0.3814, -0.4744, -0.3731, -0.3842, -0.4739, -0.4287,
        -0.3741, -0.4744, -0.2325, -0.2076, -0.4744, -0.1333, -0.1959, -0.4651,
        -0.4356, -0.4422, -0.2983, -0.2218, -0.4224, -0.3163, -0.2839, -0.3842,
        -0.2896, -0.4486, -0.1518, -0.3528, -0.4505, -0.3925, -0.3167, -0.4224,
        -0.3598, -0.3842, -0.4367, -0.0990, -0.3438, -0.6103, -0.4486, -0.1518,
        -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4396, -0.3814, -0.4744, -0.3731, -0.3842, -0.4739, -0.4287,
        -0.3741, -0.4744, -0.2325, -

Tokens embeddings:
tensor([-0.3412, -0.4396, -0.3814, -0.4744, -0.3731, -0.3842, -0.4739, -0.4287,
        -0.3741, -0.4744, -0.2325, -0.2076, -0.4744, -0.1333, -0.1959, -0.4651,
        -0.4356, -0.4422, -0.2983, -0.2218, -0.4224, -0.3163, -0.2839, -0.3842,
        -0.2896, -0.4486, -0.1518, -0.3528, -0.4505, -0.3925, -0.3167, -0.4224,
        -0.3598, -0.3842, -0.4367, -0.0990, -0.3438, -0.6103, -0.4486, -0.1518,
        -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4396, -0.3814, -0.4744, -0.3731, -0.3842, -0.4739, -0.4287,
        -0.3741, -0.4744, -0.2325, -

Tokens embeddings:
tensor([-0.3412, -0.4396, -0.3814, -0.4744, -0.3731, -0.3842, -0.4739, -0.4287,
        -0.3741, -0.4744, -0.2325, -0.2076, -0.4744, -0.1333, -0.1959, -0.4651,
        -0.4356, -0.4422, -0.2983, -0.2218, -0.4224, -0.3163, -0.2839, -0.3842,
        -0.2896, -0.4486, -0.1518, -0.3528, -0.4505, -0.3925, -0.3167, -0.4224,
        -0.3598, -0.3842, -0.4367, -0.0990, -0.3438, -0.6103, -0.4486, -0.1518,
        -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4396, -0.3814, -0.4744, -0.3731, -0.3842, -0.4739, -0.4287,
        -0.3741, -0.4744, -0.2325, -

Tokens embeddings:
tensor([-0.3412, -0.4396, -0.3814, -0.4744, -0.3731, -0.3842, -0.4739, -0.4287,
        -0.3741, -0.4744, -0.2325, -0.2076, -0.4744, -0.1333, -0.1959, -0.4651,
        -0.4356, -0.4422, -0.2983, -0.2218, -0.4224, -0.3163, -0.2839, -0.3842,
        -0.2896, -0.4486, -0.1518, -0.3528, -0.4505, -0.3925, -0.3167, -0.4224,
        -0.3598, -0.3842, -0.4367, -0.0990, -0.3438, -0.6103, -0.4486, -0.1518,
        -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4396, -0.3814, -0.4744, -0.3731, -0.3842, -0.4739, -0.4287,
        -0.3741, -0.4744, -0.2325, -

Tokens embeddings:
tensor([-0.3412, -0.4396, -0.3814, -0.4744, -0.3731, -0.3842, -0.4739, -0.4287,
        -0.3741, -0.4744, -0.2325, -0.2076, -0.4744, -0.1333, -0.1959, -0.4651,
        -0.4356, -0.4422, -0.2983, -0.2218, -0.4224, -0.3163, -0.2839, -0.3842,
        -0.2896, -0.4486, -0.1518, -0.3528, -0.4505, -0.3925, -0.3167, -0.4224,
        -0.3598, -0.3842, -0.4367, -0.0990, -0.3438, -0.6103, -0.4486, -0.1518,
        -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4396, -0.3814, -0.4744, -0.3731, -0.3842, -0.4739, -0.4287,
        -0.3741, -0.4744, -0.2325, -

Tokens embeddings:
tensor([-0.3412, -0.4396, -0.3814, -0.4744, -0.3731, -0.3842, -0.4739, -0.4287,
        -0.3741, -0.4744, -0.2325, -0.2076, -0.4744, -0.1333, -0.1959, -0.4651,
        -0.4356, -0.4422, -0.2983, -0.2218, -0.4224, -0.3163, -0.2839, -0.3842,
        -0.2896, -0.4486, -0.1518, -0.3528, -0.4505, -0.3925, -0.3167, -0.4224,
        -0.3598, -0.3842, -0.4367, -0.0990, -0.3438, -0.6103, -0.4486, -0.1518,
        -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4396, -0.3814, -0.4744, -0.3731, -0.3842, -0.4739, -0.4287,
        -0.3741, -0.4744, -0.2325, -

Tokens embeddings:
tensor([-0.3412, -0.4396, -0.3814, -0.4744, -0.3731, -0.3842, -0.4739, -0.4287,
        -0.3741, -0.4744, -0.2325, -0.2076, -0.4744, -0.1333, -0.1959, -0.4651,
        -0.4356, -0.4422, -0.2983, -0.2218, -0.4224, -0.3163, -0.2839, -0.3842,
        -0.2896, -0.4486, -0.1518, -0.3528, -0.4505, -0.3925, -0.3167, -0.4224,
        -0.3598, -0.3842, -0.4367, -0.0990, -0.3438, -0.6103, -0.4486, -0.1518,
        -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4396, -0.3814, -0.4744, -0.3731, -0.3842, -0.4739, -0.4287,
        -0.3741, -0.4744, -0.2325, -

Tokens embeddings:
tensor([-0.3412, -0.4396, -0.3814, -0.4744, -0.3731, -0.3842, -0.4739, -0.4287,
        -0.3741, -0.4744, -0.2325, -0.2076, -0.4744, -0.1333, -0.1959, -0.4651,
        -0.4356, -0.4422, -0.2983, -0.2218, -0.4224, -0.3163, -0.2839, -0.3842,
        -0.2896, -0.4486, -0.1518, -0.3528, -0.4505, -0.3925, -0.3167, -0.4224,
        -0.3598, -0.3842, -0.4367, -0.0990, -0.3438, -0.6103, -0.4486, -0.1518,
        -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4396, -0.3814, -0.4744, -0.3731, -0.3842, -0.4739, -0.4287,
        -0.3741, -0.4744, -0.2325, -

Tokens embeddings:
tensor([-0.3412, -0.4396, -0.3814, -0.4744, -0.3731, -0.3842, -0.4739, -0.4287,
        -0.3741, -0.4744, -0.2325, -0.2076, -0.4744, -0.1333, -0.1959, -0.4651,
        -0.4356, -0.4422, -0.2983, -0.2218, -0.4224, -0.3163, -0.2839, -0.3842,
        -0.2896, -0.4486, -0.1518, -0.3528, -0.4505, -0.3925, -0.3167, -0.4224,
        -0.3598, -0.3842, -0.4367, -0.0990, -0.3438, -0.6103, -0.4486, -0.1518,
        -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4396, -0.3814, -0.4744, -0.3731, -0.3842, -0.4739, -0.4287,
        -0.3741, -0.4744, -0.2325, -

Tokens embeddings:
tensor([-0.3412, -0.4166, -0.4848, -0.0990, -0.3485, -0.3827, -0.2976, -0.2076,
        -0.3306, -0.4062, -0.3980, -0.2076, -0.3742, -0.4006, -0.3387, -0.4357,
        -0.3638, -0.3761, -0.2557, -0.2921, -0.4224, -0.2026, -0.3842, -0.3275,
        -0.2723, -0.5196, -0.4744, -0.4000, -0.4309, -0.4686, -0.1974, -0.2854,
        -0.3631, -0.4041, -0.4113, -0.4443, -0.0990, -0.4573, -0.3504, -0.2076,
        -0.3292, -0.4309, -0.2502, -0.2076, -0.4477, -0.1645, -0.3227, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4166, -0.4848, -0.0990, -0.3485, -0.3827, -0.2976, -0.2076,
        -0.3306, -0.4062, -0.3980, -

Tokens embeddings:
tensor([-0.3412, -0.4166, -0.4848, -0.0990, -0.3485, -0.3827, -0.2976, -0.2076,
        -0.3306, -0.4062, -0.3980, -0.2076, -0.3742, -0.4006, -0.3387, -0.4357,
        -0.3638, -0.3761, -0.2557, -0.2921, -0.4224, -0.2026, -0.3842, -0.3275,
        -0.2723, -0.5196, -0.4744, -0.4000, -0.4309, -0.4686, -0.1974, -0.2854,
        -0.3631, -0.4041, -0.4113, -0.4443, -0.0990, -0.4573, -0.3504, -0.2076,
        -0.3292, -0.4309, -0.2502, -0.2076, -0.4477, -0.1645, -0.3227, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4166, -0.4848, -0.0990, -0.3485, -0.3827, -0.2976, -0.2076,
        -0.3306, -0.4062, -0.3980, -

Tokens embeddings:
tensor([-0.3412, -0.4166, -0.4848, -0.0990, -0.3485, -0.3827, -0.2976, -0.2076,
        -0.3306, -0.4062, -0.3980, -0.2076, -0.3742, -0.4006, -0.3387, -0.4357,
        -0.3638, -0.3761, -0.2557, -0.2921, -0.4224, -0.2026, -0.3842, -0.3275,
        -0.2723, -0.5196, -0.4744, -0.4000, -0.4309, -0.4686, -0.1974, -0.2854,
        -0.3631, -0.4041, -0.4113, -0.4443, -0.0990, -0.4573, -0.3504, -0.2076,
        -0.3292, -0.4309, -0.2502, -0.2076, -0.4477, -0.1645, -0.3227, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4166, -0.4848, -0.0990, -0.3485, -0.3827, -0.2976, -0.2076,
        -0.3306, -0.4062, -0.3980, -

Tokens embeddings:
tensor([-0.3412, -0.4166, -0.4848, -0.0990, -0.3485, -0.3827, -0.2976, -0.2076,
        -0.3306, -0.4062, -0.3980, -0.2076, -0.3742, -0.4006, -0.3387, -0.4357,
        -0.3638, -0.3761, -0.2557, -0.2921, -0.4224, -0.2026, -0.3842, -0.3275,
        -0.2723, -0.5196, -0.4744, -0.4000, -0.4309, -0.4686, -0.1974, -0.2854,
        -0.3631, -0.4041, -0.4113, -0.4443, -0.0990, -0.4573, -0.3504, -0.2076,
        -0.3292, -0.4309, -0.2502, -0.2076, -0.4477, -0.1645, -0.3227, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4166, -0.4848, -0.0990, -0.3485, -0.3827, -0.2976, -0.2076,
        -0.3306, -0.4062, -0.3980, -

Tokens embeddings:
tensor([-0.3412, -0.4166, -0.4848, -0.0990, -0.3485, -0.3827, -0.2976, -0.2076,
        -0.3306, -0.4062, -0.3980, -0.2076, -0.3742, -0.4006, -0.3387, -0.4357,
        -0.3638, -0.3761, -0.2557, -0.2921, -0.4224, -0.2026, -0.3842, -0.3275,
        -0.2723, -0.5196, -0.4744, -0.4000, -0.4309, -0.4686, -0.1974, -0.2854,
        -0.3631, -0.4041, -0.4113, -0.4443, -0.0990, -0.4573, -0.3504, -0.2076,
        -0.3292, -0.4309, -0.2502, -0.2076, -0.4477, -0.1645, -0.3227, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4166, -0.4848, -0.0990, -0.3485, -0.3827, -0.2976, -0.2076,
        -0.3306, -0.4062, -0.3980, -

Tokens embeddings:
tensor([-0.3412, -0.4166, -0.4848, -0.0990, -0.3485, -0.3827, -0.2976, -0.2076,
        -0.3306, -0.4062, -0.3980, -0.2076, -0.3742, -0.4006, -0.3387, -0.4357,
        -0.3638, -0.3761, -0.2557, -0.2921, -0.4224, -0.2026, -0.3842, -0.3275,
        -0.2723, -0.5196, -0.4744, -0.4000, -0.4309, -0.4686, -0.1974, -0.2854,
        -0.3631, -0.4041, -0.4113, -0.4443, -0.0990, -0.4573, -0.3504, -0.2076,
        -0.3292, -0.4309, -0.2502, -0.2076, -0.4477, -0.1645, -0.3227, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4166, -0.4848, -0.0990, -0.3485, -0.3827, -0.2976, -0.2076,
        -0.3306, -0.4062, -0.3980, -

Tokens embeddings:
tensor([-0.3412, -0.4166, -0.4848, -0.0990, -0.3485, -0.3827, -0.2976, -0.2076,
        -0.3306, -0.4062, -0.3980, -0.2076, -0.3742, -0.4006, -0.3387, -0.4357,
        -0.3638, -0.3761, -0.2557, -0.2921, -0.4224, -0.2026, -0.3842, -0.3275,
        -0.2723, -0.5196, -0.4744, -0.4000, -0.4309, -0.4686, -0.1974, -0.2854,
        -0.3631, -0.4041, -0.4113, -0.4443, -0.0990, -0.4573, -0.3504, -0.2076,
        -0.3292, -0.4309, -0.2502, -0.2076, -0.4477, -0.1645, -0.3227, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4166, -0.4848, -0.0990, -0.3485, -0.3827, -0.2976, -0.2076,
        -0.3306, -0.4062, -0.3980, -

Tokens embeddings:
tensor([-0.3412, -0.4166, -0.4848, -0.0990, -0.3485, -0.3827, -0.2976, -0.2076,
        -0.3306, -0.4062, -0.3980, -0.2076, -0.3742, -0.4006, -0.3387, -0.4357,
        -0.3638, -0.3761, -0.2557, -0.2921, -0.4224, -0.2026, -0.3842, -0.3275,
        -0.2723, -0.5196, -0.4744, -0.4000, -0.4309, -0.4686, -0.1974, -0.2854,
        -0.3631, -0.4041, -0.4113, -0.4443, -0.0990, -0.4573, -0.3504, -0.2076,
        -0.3292, -0.4309, -0.2502, -0.2076, -0.4477, -0.1645, -0.3227, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4166, -0.4848, -0.0990, -0.3485, -0.3827, -0.2976, -0.2076,
        -0.3306, -0.4062, -0.3980, -

Tokens embeddings:
tensor([-0.3412, -0.4166, -0.4848, -0.0990, -0.3485, -0.3827, -0.2976, -0.2076,
        -0.3306, -0.4062, -0.3980, -0.2076, -0.3742, -0.4006, -0.3387, -0.4357,
        -0.3638, -0.3761, -0.2557, -0.2921, -0.4224, -0.2026, -0.3842, -0.3275,
        -0.2723, -0.5196, -0.4744, -0.4000, -0.4309, -0.4686, -0.1974, -0.2854,
        -0.3631, -0.4041, -0.4113, -0.4443, -0.0990, -0.4573, -0.3504, -0.2076,
        -0.3292, -0.4309, -0.2502, -0.2076, -0.4477, -0.1645, -0.3227, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4166, -0.4848, -0.0990, -0.3485, -0.3827, -0.2976, -0.2076,
        -0.3306, -0.4062, -0.3980, -

Tokens embeddings:
tensor([-0.3412, -0.4166, -0.4848, -0.0990, -0.3485, -0.3827, -0.2976, -0.2076,
        -0.3306, -0.4062, -0.3980, -0.2076, -0.3742, -0.4006, -0.3387, -0.4357,
        -0.3638, -0.3761, -0.2557, -0.2921, -0.4224, -0.2026, -0.3842, -0.3275,
        -0.2723, -0.5196, -0.4744, -0.4000, -0.4309, -0.4686, -0.1974, -0.2854,
        -0.3631, -0.4041, -0.4113, -0.4443, -0.0990, -0.4573, -0.3504, -0.2076,
        -0.3292, -0.4309, -0.2502, -0.2076, -0.4477, -0.1645, -0.3227, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4166, -0.4848, -0.0990, -0.3485, -0.3827, -0.2976, -0.2076,
        -0.3306, -0.4062, -0.3980, -

Tokens embeddings:
tensor([-0.3412, -0.4166, -0.4848, -0.0990, -0.3485, -0.3827, -0.2976, -0.2076,
        -0.3306, -0.4062, -0.3980, -0.2076, -0.3742, -0.4006, -0.3387, -0.4357,
        -0.3638, -0.3761, -0.2557, -0.2921, -0.4224, -0.2026, -0.3842, -0.3275,
        -0.2723, -0.5196, -0.4744, -0.4000, -0.4309, -0.4686, -0.1974, -0.2854,
        -0.3631, -0.4041, -0.4113, -0.4443, -0.0990, -0.4573, -0.3504, -0.2076,
        -0.3292, -0.4309, -0.2502, -0.2076, -0.4477, -0.1645, -0.3227, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4166, -0.4848, -0.0990, -0.3485, -0.3827, -0.2976, -0.2076,
        -0.3306, -0.4062, -0.3980, -

Tokens embeddings:
tensor([-0.3412, -0.4166, -0.4848, -0.0990, -0.3485, -0.3827, -0.2976, -0.2076,
        -0.3306, -0.4062, -0.3980, -0.2076, -0.3742, -0.4006, -0.3387, -0.4357,
        -0.3638, -0.3761, -0.2557, -0.2921, -0.4224, -0.2026, -0.3842, -0.3275,
        -0.2723, -0.5196, -0.4744, -0.4000, -0.4309, -0.4686, -0.1974, -0.2854,
        -0.3631, -0.4041, -0.4113, -0.4443, -0.0990, -0.4573, -0.3504, -0.2076,
        -0.3292, -0.4309, -0.2502, -0.2076, -0.4477, -0.1645, -0.3227, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4166, -0.4848, -0.0990, -0.3485, -0.3827, -0.2976, -0.2076,
        -0.3306, -0.4062, -0.3980, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.2783, -0.2107, -0.5139, -0.3328, -0.4224,
        -0.5278, -0.3174, -0.3842, -0.4893, -0.2076, -0.4758, -0.4744, -0.4182,
        -0.2835, -0.3292, -0.5149, -0.4550, -0.3788, -0.4119, -0.4269, -0.2972,
        -0.4744, -0.3429, -0.5873, -0.1870, -0.4217, -0.4538, -0.4744, -0.4846,
        -0.3021, -0.3842, -0.2932, -0.4579, -0.4744, -0.4846, -0.3980, -0.4102,
        -0.3686, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.2783, -0.2107, -0.5139, -0.3328, -0.4224,
        -0.5278, -0.3174, -0.3842, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.2783, -0.2107, -0.5139, -0.3328, -0.4224,
        -0.5278, -0.3174, -0.3842, -0.4893, -0.2076, -0.4758, -0.4744, -0.4182,
        -0.2835, -0.3292, -0.5149, -0.4550, -0.3788, -0.4119, -0.4269, -0.2972,
        -0.4744, -0.3429, -0.5873, -0.1870, -0.4217, -0.4538, -0.4744, -0.4846,
        -0.3021, -0.3842, -0.2932, -0.4579, -0.4744, -0.4846, -0.3980, -0.4102,
        -0.3686, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.2783, -0.2107, -0.5139, -0.3328, -0.4224,
        -0.5278, -0.3174, -0.3842, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.2783, -0.2107, -0.5139, -0.3328, -0.4224,
        -0.5278, -0.3174, -0.3842, -0.4893, -0.2076, -0.4758, -0.4744, -0.4182,
        -0.2835, -0.3292, -0.5149, -0.4550, -0.3788, -0.4119, -0.4269, -0.2972,
        -0.4744, -0.3429, -0.5873, -0.1870, -0.4217, -0.4538, -0.4744, -0.4846,
        -0.3021, -0.3842, -0.2932, -0.4579, -0.4744, -0.4846, -0.3980, -0.4102,
        -0.3686, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.2783, -0.2107, -0.5139, -0.3328, -0.4224,
        -0.5278, -0.3174, -0.3842, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.2783, -0.2107, -0.5139, -0.3328, -0.4224,
        -0.5278, -0.3174, -0.3842, -0.4893, -0.2076, -0.4758, -0.4744, -0.4182,
        -0.2835, -0.3292, -0.5149, -0.4550, -0.3788, -0.4119, -0.4269, -0.2972,
        -0.4744, -0.3429, -0.5873, -0.1870, -0.4217, -0.4538, -0.4744, -0.4846,
        -0.3021, -0.3842, -0.2932, -0.4579, -0.4744, -0.4846, -0.3980, -0.4102,
        -0.3686, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.2783, -0.2107, -0.5139, -0.3328, -0.4224,
        -0.5278, -0.3174, -0.3842, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.2783, -0.2107, -0.5139, -0.3328, -0.4224,
        -0.5278, -0.3174, -0.3842, -0.4893, -0.2076, -0.4758, -0.4744, -0.4182,
        -0.2835, -0.3292, -0.5149, -0.4550, -0.3788, -0.4119, -0.4269, -0.2972,
        -0.4744, -0.3429, -0.5873, -0.1870, -0.4217, -0.4538, -0.4744, -0.4846,
        -0.3021, -0.3842, -0.2932, -0.4579, -0.4744, -0.4846, -0.3980, -0.4102,
        -0.3686, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.2783, -0.2107, -0.5139, -0.3328, -0.4224,
        -0.5278, -0.3174, -0.3842, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.2783, -0.2107, -0.5139, -0.3328, -0.4224,
        -0.5278, -0.3174, -0.3842, -0.4893, -0.2076, -0.4758, -0.4744, -0.4182,
        -0.2835, -0.3292, -0.5149, -0.4550, -0.3788, -0.4119, -0.4269, -0.2972,
        -0.4744, -0.3429, -0.5873, -0.1870, -0.4217, -0.4538, -0.4744, -0.4846,
        -0.3021, -0.3842, -0.2932, -0.4579, -0.4744, -0.4846, -0.3980, -0.4102,
        -0.3686, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.2783, -0.2107, -0.5139, -0.3328, -0.4224,
        -0.5278, -0.3174, -0.3842, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.2783, -0.2107, -0.5139, -0.3328, -0.4224,
        -0.5278, -0.3174, -0.3842, -0.4893, -0.2076, -0.4758, -0.4744, -0.4182,
        -0.2835, -0.3292, -0.5149, -0.4550, -0.3788, -0.4119, -0.4269, -0.2972,
        -0.4744, -0.3429, -0.5873, -0.1870, -0.4217, -0.4538, -0.4744, -0.4846,
        -0.3021, -0.3842, -0.2932, -0.4579, -0.4744, -0.4846, -0.3980, -0.4102,
        -0.3686, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.2783, -0.2107, -0.5139, -0.3328, -0.4224,
        -0.5278, -0.3174, -0.3842, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.2783, -0.2107, -0.5139, -0.3328, -0.4224,
        -0.5278, -0.3174, -0.3842, -0.4893, -0.2076, -0.4758, -0.4744, -0.4182,
        -0.2835, -0.3292, -0.5149, -0.4550, -0.3788, -0.4119, -0.4269, -0.2972,
        -0.4744, -0.3429, -0.5873, -0.1870, -0.4217, -0.4538, -0.4744, -0.4846,
        -0.3021, -0.3842, -0.2932, -0.4579, -0.4744, -0.4846, -0.3980, -0.4102,
        -0.3686, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.2783, -0.2107, -0.5139, -0.3328, -0.4224,
        -0.5278, -0.3174, -0.3842, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.2783, -0.2107, -0.5139, -0.3328, -0.4224,
        -0.5278, -0.3174, -0.3842, -0.4893, -0.2076, -0.4758, -0.4744, -0.4182,
        -0.2835, -0.3292, -0.5149, -0.4550, -0.3788, -0.4119, -0.4269, -0.2972,
        -0.4744, -0.3429, -0.5873, -0.1870, -0.4217, -0.4538, -0.4744, -0.4846,
        -0.3021, -0.3842, -0.2932, -0.4579, -0.4744, -0.4846, -0.3980, -0.4102,
        -0.3686, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.2783, -0.2107, -0.5139, -0.3328, -0.4224,
        -0.5278, -0.3174, -0.3842, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.2783, -0.2107, -0.5139, -0.3328, -0.4224,
        -0.5278, -0.3174, -0.3842, -0.4893, -0.2076, -0.4758, -0.4744, -0.4182,
        -0.2835, -0.3292, -0.5149, -0.4550, -0.3788, -0.4119, -0.4269, -0.2972,
        -0.4744, -0.3429, -0.5873, -0.1870, -0.4217, -0.4538, -0.4744, -0.4846,
        -0.3021, -0.3842, -0.2932, -0.4579, -0.4744, -0.4846, -0.3980, -0.4102,
        -0.3686, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.2783, -0.2107, -0.5139, -0.3328, -0.4224,
        -0.5278, -0.3174, -0.3842, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.2783, -0.2107, -0.5139, -0.3328, -0.4224,
        -0.5278, -0.3174, -0.3842, -0.4893, -0.2076, -0.4758, -0.4744, -0.4182,
        -0.2835, -0.3292, -0.5149, -0.4550, -0.3788, -0.4119, -0.4269, -0.2972,
        -0.4744, -0.3429, -0.5873, -0.1870, -0.4217, -0.4538, -0.4744, -0.4846,
        -0.3021, -0.3842, -0.2932, -0.4579, -0.4744, -0.4846, -0.3980, -0.4102,
        -0.3686, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.2783, -0.2107, -0.5139, -0.3328, -0.4224,
        -0.5278, -0.3174, -0.3842, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.2783, -0.2107, -0.5139, -0.3328, -0.4224,
        -0.5278, -0.3174, -0.3842, -0.4893, -0.2076, -0.4758, -0.4744, -0.4182,
        -0.2835, -0.3292, -0.5149, -0.4550, -0.3788, -0.4119, -0.4269, -0.2972,
        -0.4744, -0.3429, -0.5873, -0.1870, -0.4217, -0.4538, -0.4744, -0.4846,
        -0.3021, -0.3842, -0.2932, -0.4579, -0.4744, -0.4846, -0.3980, -0.4102,
        -0.3686, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.2783, -0.2107, -0.5139, -0.3328, -0.4224,
        -0.5278, -0.3174, -0.3842, -

Tokens embeddings:
tensor([-0.3412, -0.2057, -0.4733, -0.3218, -0.4119, -0.3013, -0.4573, -0.2091,
        -0.1882, -0.4417, -0.2076, -0.4316, -0.4224, -0.2540, -0.0990, -0.4338,
        -0.0990, -0.4043, -0.2513, -0.5111, -0.3913, -0.1580, -0.3650, -0.4407,
        -0.3730, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2057, -0.4733, -0.3218, -0.4119, -0.3013, -0.4573, -0.2091,
        -0.1882, -0.4417, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.2057, -0.4733, -0.3218, -0.4119, -0.3013, -0.4573, -0.2091,
        -0.1882, -0.4417, -0.2076, -0.4316, -0.4224, -0.2540, -0.0990, -0.4338,
        -0.0990, -0.4043, -0.2513, -0.5111, -0.3913, -0.1580, -0.3650, -0.4407,
        -0.3730, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2057, -0.4733, -0.3218, -0.4119, -0.3013, -0.4573, -0.2091,
        -0.1882, -0.4417, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.2057, -0.4733, -0.3218, -0.4119, -0.3013, -0.4573, -0.2091,
        -0.1882, -0.4417, -0.2076, -0.4316, -0.4224, -0.2540, -0.0990, -0.4338,
        -0.0990, -0.4043, -0.2513, -0.5111, -0.3913, -0.1580, -0.3650, -0.4407,
        -0.3730, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2057, -0.4733, -0.3218, -0.4119, -0.3013, -0.4573, -0.2091,
        -0.1882, -0.4417, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.2057, -0.4733, -0.3218, -0.4119, -0.3013, -0.4573, -0.2091,
        -0.1882, -0.4417, -0.2076, -0.4316, -0.4224, -0.2540, -0.0990, -0.4338,
        -0.0990, -0.4043, -0.2513, -0.5111, -0.3913, -0.1580, -0.3650, -0.4407,
        -0.3730, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2057, -0.4733, -0.3218, -0.4119, -0.3013, -0.4573, -0.2091,
        -0.1882, -0.4417, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.2057, -0.4733, -0.3218, -0.4119, -0.3013, -0.4573, -0.2091,
        -0.1882, -0.4417, -0.2076, -0.4316, -0.4224, -0.2540, -0.0990, -0.4338,
        -0.0990, -0.4043, -0.2513, -0.5111, -0.3913, -0.1580, -0.3650, -0.4407,
        -0.3730, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2057, -0.4733, -0.3218, -0.4119, -0.3013, -0.4573, -0.2091,
        -0.1882, -0.4417, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.2057, -0.4733, -0.3218, -0.4119, -0.3013, -0.4573, -0.2091,
        -0.1882, -0.4417, -0.2076, -0.4316, -0.4224, -0.2540, -0.0990, -0.4338,
        -0.0990, -0.4043, -0.2513, -0.5111, -0.3913, -0.1580, -0.3650, -0.4407,
        -0.3730, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2057, -0.4733, -0.3218, -0.4119, -0.3013, -0.4573, -0.2091,
        -0.1882, -0.4417, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.2057, -0.4733, -0.3218, -0.4119, -0.3013, -0.4573, -0.2091,
        -0.1882, -0.4417, -0.2076, -0.4316, -0.4224, -0.2540, -0.0990, -0.4338,
        -0.0990, -0.4043, -0.2513, -0.5111, -0.3913, -0.1580, -0.3650, -0.4407,
        -0.3730, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2057, -0.4733, -0.3218, -0.4119, -0.3013, -0.4573, -0.2091,
        -0.1882, -0.4417, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.2057, -0.4733, -0.3218, -0.4119, -0.3013, -0.4573, -0.2091,
        -0.1882, -0.4417, -0.2076, -0.4316, -0.4224, -0.2540, -0.0990, -0.4338,
        -0.0990, -0.4043, -0.2513, -0.5111, -0.3913, -0.1580, -0.3650, -0.4407,
        -0.3730, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2057, -0.4733, -0.3218, -0.4119, -0.3013, -0.4573, -0.2091,
        -0.1882, -0.4417, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.2057, -0.4733, -0.3218, -0.4119, -0.3013, -0.4573, -0.2091,
        -0.1882, -0.4417, -0.2076, -0.4316, -0.4224, -0.2540, -0.0990, -0.4338,
        -0.0990, -0.4043, -0.2513, -0.5111, -0.3913, -0.1580, -0.3650, -0.4407,
        -0.3730, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2057, -0.4733, -0.3218, -0.4119, -0.3013, -0.4573, -0.2091,
        -0.1882, -0.4417, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.2057, -0.4733, -0.3218, -0.4119, -0.3013, -0.4573, -0.2091,
        -0.1882, -0.4417, -0.2076, -0.4316, -0.4224, -0.2540, -0.0990, -0.4338,
        -0.0990, -0.4043, -0.2513, -0.5111, -0.3913, -0.1580, -0.3650, -0.4407,
        -0.3730, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2057, -0.4733, -0.3218, -0.4119, -0.3013, -0.4573, -0.2091,
        -0.1882, -0.4417, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.2057, -0.4733, -0.3218, -0.4119, -0.3013, -0.4573, -0.2091,
        -0.1882, -0.4417, -0.2076, -0.4316, -0.4224, -0.2540, -0.0990, -0.4338,
        -0.0990, -0.4043, -0.2513, -0.5111, -0.3913, -0.1580, -0.3650, -0.4407,
        -0.3730, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2057, -0.4733, -0.3218, -0.4119, -0.3013, -0.4573, -0.2091,
        -0.1882, -0.4417, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.2057, -0.4733, -0.3218, -0.4119, -0.3013, -0.4573, -0.2091,
        -0.1882, -0.4417, -0.2076, -0.4316, -0.4224, -0.2540, -0.0990, -0.4338,
        -0.0990, -0.4043, -0.2513, -0.5111, -0.3913, -0.1580, -0.3650, -0.4407,
        -0.3730, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2057, -0.4733, -0.3218, -0.4119, -0.3013, -0.4573, -0.2091,
        -0.1882, -0.4417, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.4111, -0.2732, -0.3639, -0.3842, -0.2980,
        -0.2765, -0.4111, -0.2732, -0.3639, -0.4372, -0.4744, -0.2560, -0.5139,
        -0.4119, -0.4119, -0.3580, -0.4119, -0.4119, -0.4154, -0.0830, -0.5196,
        -0.4378, -0.3842, -0.4744, -0.3429, -0.3862, -0.4337, -0.2076, -0.3869,
        -0.3227, -0.4621, -0.3541, -0.2098, -0.3920, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.4111, -0.2732, -0.3639, -0.3842, -0.2980,
        -0.2765, -0.4111, -0.2732, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.4111, -0.2732, -0.3639, -0.3842, -0.2980,
        -0.2765, -0.4111, -0.2732, -0.3639, -0.4372, -0.4744, -0.2560, -0.5139,
        -0.4119, -0.4119, -0.3580, -0.4119, -0.4119, -0.4154, -0.0830, -0.5196,
        -0.4378, -0.3842, -0.4744, -0.3429, -0.3862, -0.4337, -0.2076, -0.3869,
        -0.3227, -0.4621, -0.3541, -0.2098, -0.3920, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.4111, -0.2732, -0.3639, -0.3842, -0.2980,
        -0.2765, -0.4111, -0.2732, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.4111, -0.2732, -0.3639, -0.3842, -0.2980,
        -0.2765, -0.4111, -0.2732, -0.3639, -0.4372, -0.4744, -0.2560, -0.5139,
        -0.4119, -0.4119, -0.3580, -0.4119, -0.4119, -0.4154, -0.0830, -0.5196,
        -0.4378, -0.3842, -0.4744, -0.3429, -0.3862, -0.4337, -0.2076, -0.3869,
        -0.3227, -0.4621, -0.3541, -0.2098, -0.3920, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.4111, -0.2732, -0.3639, -0.3842, -0.2980,
        -0.2765, -0.4111, -0.2732, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.4111, -0.2732, -0.3639, -0.3842, -0.2980,
        -0.2765, -0.4111, -0.2732, -0.3639, -0.4372, -0.4744, -0.2560, -0.5139,
        -0.4119, -0.4119, -0.3580, -0.4119, -0.4119, -0.4154, -0.0830, -0.5196,
        -0.4378, -0.3842, -0.4744, -0.3429, -0.3862, -0.4337, -0.2076, -0.3869,
        -0.3227, -0.4621, -0.3541, -0.2098, -0.3920, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.4111, -0.2732, -0.3639, -0.3842, -0.2980,
        -0.2765, -0.4111, -0.2732, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.4111, -0.2732, -0.3639, -0.3842, -0.2980,
        -0.2765, -0.4111, -0.2732, -0.3639, -0.4372, -0.4744, -0.2560, -0.5139,
        -0.4119, -0.4119, -0.3580, -0.4119, -0.4119, -0.4154, -0.0830, -0.5196,
        -0.4378, -0.3842, -0.4744, -0.3429, -0.3862, -0.4337, -0.2076, -0.3869,
        -0.3227, -0.4621, -0.3541, -0.2098, -0.3920, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.4111, -0.2732, -0.3639, -0.3842, -0.2980,
        -0.2765, -0.4111, -0.2732, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.4111, -0.2732, -0.3639, -0.3842, -0.2980,
        -0.2765, -0.4111, -0.2732, -0.3639, -0.4372, -0.4744, -0.2560, -0.5139,
        -0.4119, -0.4119, -0.3580, -0.4119, -0.4119, -0.4154, -0.0830, -0.5196,
        -0.4378, -0.3842, -0.4744, -0.3429, -0.3862, -0.4337, -0.2076, -0.3869,
        -0.3227, -0.4621, -0.3541, -0.2098, -0.3920, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.4111, -0.2732, -0.3639, -0.3842, -0.2980,
        -0.2765, -0.4111, -0.2732, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.4111, -0.2732, -0.3639, -0.3842, -0.2980,
        -0.2765, -0.4111, -0.2732, -0.3639, -0.4372, -0.4744, -0.2560, -0.5139,
        -0.4119, -0.4119, -0.3580, -0.4119, -0.4119, -0.4154, -0.0830, -0.5196,
        -0.4378, -0.3842, -0.4744, -0.3429, -0.3862, -0.4337, -0.2076, -0.3869,
        -0.3227, -0.4621, -0.3541, -0.2098, -0.3920, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.4111, -0.2732, -0.3639, -0.3842, -0.2980,
        -0.2765, -0.4111, -0.2732, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.4111, -0.2732, -0.3639, -0.3842, -0.2980,
        -0.2765, -0.4111, -0.2732, -0.3639, -0.4372, -0.4744, -0.2560, -0.5139,
        -0.4119, -0.4119, -0.3580, -0.4119, -0.4119, -0.4154, -0.0830, -0.5196,
        -0.4378, -0.3842, -0.4744, -0.3429, -0.3862, -0.4337, -0.2076, -0.3869,
        -0.3227, -0.4621, -0.3541, -0.2098, -0.3920, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.4111, -0.2732, -0.3639, -0.3842, -0.2980,
        -0.2765, -0.4111, -0.2732, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.4111, -0.2732, -0.3639, -0.3842, -0.2980,
        -0.2765, -0.4111, -0.2732, -0.3639, -0.4372, -0.4744, -0.2560, -0.5139,
        -0.4119, -0.4119, -0.3580, -0.4119, -0.4119, -0.4154, -0.0830, -0.5196,
        -0.4378, -0.3842, -0.4744, -0.3429, -0.3862, -0.4337, -0.2076, -0.3869,
        -0.3227, -0.4621, -0.3541, -0.2098, -0.3920, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.4111, -0.2732, -0.3639, -0.3842, -0.2980,
        -0.2765, -0.4111, -0.2732, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.4111, -0.2732, -0.3639, -0.3842, -0.2980,
        -0.2765, -0.4111, -0.2732, -0.3639, -0.4372, -0.4744, -0.2560, -0.5139,
        -0.4119, -0.4119, -0.3580, -0.4119, -0.4119, -0.4154, -0.0830, -0.5196,
        -0.4378, -0.3842, -0.4744, -0.3429, -0.3862, -0.4337, -0.2076, -0.3869,
        -0.3227, -0.4621, -0.3541, -0.2098, -0.3920, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.4111, -0.2732, -0.3639, -0.3842, -0.2980,
        -0.2765, -0.4111, -0.2732, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.4111, -0.2732, -0.3639, -0.3842, -0.2980,
        -0.2765, -0.4111, -0.2732, -0.3639, -0.4372, -0.4744, -0.2560, -0.5139,
        -0.4119, -0.4119, -0.3580, -0.4119, -0.4119, -0.4154, -0.0830, -0.5196,
        -0.4378, -0.3842, -0.4744, -0.3429, -0.3862, -0.4337, -0.2076, -0.3869,
        -0.3227, -0.4621, -0.3541, -0.2098, -0.3920, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.4111, -0.2732, -0.3639, -0.3842, -0.2980,
        -0.2765, -0.4111, -0.2732, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.4111, -0.2732, -0.3639, -0.3842, -0.2980,
        -0.2765, -0.4111, -0.2732, -0.3639, -0.4372, -0.4744, -0.2560, -0.5139,
        -0.4119, -0.4119, -0.3580, -0.4119, -0.4119, -0.4154, -0.0830, -0.5196,
        -0.4378, -0.3842, -0.4744, -0.3429, -0.3862, -0.4337, -0.2076, -0.3869,
        -0.3227, -0.4621, -0.3541, -0.2098, -0.3920, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.4111, -0.2732, -0.3639, -0.3842, -0.2980,
        -0.2765, -0.4111, -0.2732, -

Tokens embeddings:
tensor([-0.3412, -0.5140, -0.1932, -0.4258, -0.4744, -0.3387, -0.2745, -0.3888,
        -0.3842, -0.4224, -0.3416, -0.2910, -0.2498, -0.2921, -0.2584, -0.3519,
        -0.2218, -0.4070, -0.5442, -0.4744, -0.4082, -0.3741, -0.3412, -0.4811,
        -0.3842, -0.4077, -0.3786, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.5140, -0.1932, -0.4258, -0.4744, -0.3387, -0.2745, -0.3888,
        -0.3842, -0.4224, -0.3416, -

Tokens embeddings:
tensor([-0.3412, -0.5140, -0.1932, -0.4258, -0.4744, -0.3387, -0.2745, -0.3888,
        -0.3842, -0.4224, -0.3416, -0.2910, -0.2498, -0.2921, -0.2584, -0.3519,
        -0.2218, -0.4070, -0.5442, -0.4744, -0.4082, -0.3741, -0.3412, -0.4811,
        -0.3842, -0.4077, -0.3786, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.5140, -0.1932, -0.4258, -0.4744, -0.3387, -0.2745, -0.3888,
        -0.3842, -0.4224, -0.3416, -

Tokens embeddings:
tensor([-0.3412, -0.5140, -0.1932, -0.4258, -0.4744, -0.3387, -0.2745, -0.3888,
        -0.3842, -0.4224, -0.3416, -0.2910, -0.2498, -0.2921, -0.2584, -0.3519,
        -0.2218, -0.4070, -0.5442, -0.4744, -0.4082, -0.3741, -0.3412, -0.4811,
        -0.3842, -0.4077, -0.3786, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.5140, -0.1932, -0.4258, -0.4744, -0.3387, -0.2745, -0.3888,
        -0.3842, -0.4224, -0.3416, -

Tokens embeddings:
tensor([-0.3412, -0.5140, -0.1932, -0.4258, -0.4744, -0.3387, -0.2745, -0.3888,
        -0.3842, -0.4224, -0.3416, -0.2910, -0.2498, -0.2921, -0.2584, -0.3519,
        -0.2218, -0.4070, -0.5442, -0.4744, -0.4082, -0.3741, -0.3412, -0.4811,
        -0.3842, -0.4077, -0.3786, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.5140, -0.1932, -0.4258, -0.4744, -0.3387, -0.2745, -0.3888,
        -0.3842, -0.4224, -0.3416, -

Tokens embeddings:
tensor([-0.3412, -0.5140, -0.1932, -0.4258, -0.4744, -0.3387, -0.2745, -0.3888,
        -0.3842, -0.4224, -0.3416, -0.2910, -0.2498, -0.2921, -0.2584, -0.3519,
        -0.2218, -0.4070, -0.5442, -0.4744, -0.4082, -0.3741, -0.3412, -0.4811,
        -0.3842, -0.4077, -0.3786, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.5140, -0.1932, -0.4258, -0.4744, -0.3387, -0.2745, -0.3888,
        -0.3842, -0.4224, -0.3416, -

Tokens embeddings:
tensor([-0.3412, -0.5140, -0.1932, -0.4258, -0.4744, -0.3387, -0.2745, -0.3888,
        -0.3842, -0.4224, -0.3416, -0.2910, -0.2498, -0.2921, -0.2584, -0.3519,
        -0.2218, -0.4070, -0.5442, -0.4744, -0.4082, -0.3741, -0.3412, -0.4811,
        -0.3842, -0.4077, -0.3786, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.5140, -0.1932, -0.4258, -0.4744, -0.3387, -0.2745, -0.3888,
        -0.3842, -0.4224, -0.3416, -

Tokens embeddings:
tensor([-0.3412, -0.5140, -0.1932, -0.4258, -0.4744, -0.3387, -0.2745, -0.3888,
        -0.3842, -0.4224, -0.3416, -0.2910, -0.2498, -0.2921, -0.2584, -0.3519,
        -0.2218, -0.4070, -0.5442, -0.4744, -0.4082, -0.3741, -0.3412, -0.4811,
        -0.3842, -0.4077, -0.3786, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.5140, -0.1932, -0.4258, -0.4744, -0.3387, -0.2745, -0.3888,
        -0.3842, -0.4224, -0.3416, -

Tokens embeddings:
tensor([-0.3412, -0.5140, -0.1932, -0.4258, -0.4744, -0.3387, -0.2745, -0.3888,
        -0.3842, -0.4224, -0.3416, -0.2910, -0.2498, -0.2921, -0.2584, -0.3519,
        -0.2218, -0.4070, -0.5442, -0.4744, -0.4082, -0.3741, -0.3412, -0.4811,
        -0.3842, -0.4077, -0.3786, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.5140, -0.1932, -0.4258, -0.4744, -0.3387, -0.2745, -0.3888,
        -0.3842, -0.4224, -0.3416, -

Tokens embeddings:
tensor([-0.3412, -0.5140, -0.1932, -0.4258, -0.4744, -0.3387, -0.2745, -0.3888,
        -0.3842, -0.4224, -0.3416, -0.2910, -0.2498, -0.2921, -0.2584, -0.3519,
        -0.2218, -0.4070, -0.5442, -0.4744, -0.4082, -0.3741, -0.3412, -0.4811,
        -0.3842, -0.4077, -0.3786, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.5140, -0.1932, -0.4258, -0.4744, -0.3387, -0.2745, -0.3888,
        -0.3842, -0.4224, -0.3416, -

Tokens embeddings:
tensor([-0.3412, -0.5140, -0.1932, -0.4258, -0.4744, -0.3387, -0.2745, -0.3888,
        -0.3842, -0.4224, -0.3416, -0.2910, -0.2498, -0.2921, -0.2584, -0.3519,
        -0.2218, -0.4070, -0.5442, -0.4744, -0.4082, -0.3741, -0.3412, -0.4811,
        -0.3842, -0.4077, -0.3786, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.5140, -0.1932, -0.4258, -0.4744, -0.3387, -0.2745, -0.3888,
        -0.3842, -0.4224, -0.3416, -

Tokens embeddings:
tensor([-0.3412, -0.5140, -0.1932, -0.4258, -0.4744, -0.3387, -0.2745, -0.3888,
        -0.3842, -0.4224, -0.3416, -0.2910, -0.2498, -0.2921, -0.2584, -0.3519,
        -0.2218, -0.4070, -0.5442, -0.4744, -0.4082, -0.3741, -0.3412, -0.4811,
        -0.3842, -0.4077, -0.3786, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.5140, -0.1932, -0.4258, -0.4744, -0.3387, -0.2745, -0.3888,
        -0.3842, -0.4224, -0.3416, -

Tokens embeddings:
tensor([-0.3412, -0.5140, -0.1932, -0.4258, -0.4744, -0.3387, -0.2745, -0.3888,
        -0.3842, -0.4224, -0.3416, -0.2910, -0.2498, -0.2921, -0.2584, -0.3519,
        -0.2218, -0.4070, -0.5442, -0.4744, -0.4082, -0.3741, -0.3412, -0.4811,
        -0.3842, -0.4077, -0.3786, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.5140, -0.1932, -0.4258, -0.4744, -0.3387, -0.2745, -0.3888,
        -0.3842, -0.4224, -0.3416, -

Tokens embeddings:
tensor([-0.3412, -0.4320, -0.3826, -0.4580, -0.2609, -0.3865, -0.4365, -0.3640,
        -0.3183, -0.2837, -0.2076, -0.4744, -0.1859, -0.3609, -0.3630, -0.3292,
        -0.3827, -0.2723, -0.1784, -0.2910, -0.4224, -0.3609, -0.2812, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4320, -0.3826, -0.4580, -0.2609, -0.3865, -0.4365, -0.3640,
        -0.3183, -0.2837, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.4320, -0.3826, -0.4580, -0.2609, -0.3865, -0.4365, -0.3640,
        -0.3183, -0.2837, -0.2076, -0.4744, -0.1859, -0.3609, -0.3630, -0.3292,
        -0.3827, -0.2723, -0.1784, -0.2910, -0.4224, -0.3609, -0.2812, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4320, -0.3826, -0.4580, -0.2609, -0.3865, -0.4365, -0.3640,
        -0.3183, -0.2837, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.4320, -0.3826, -0.4580, -0.2609, -0.3865, -0.4365, -0.3640,
        -0.3183, -0.2837, -0.2076, -0.4744, -0.1859, -0.3609, -0.3630, -0.3292,
        -0.3827, -0.2723, -0.1784, -0.2910, -0.4224, -0.3609, -0.2812, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4320, -0.3826, -0.4580, -0.2609, -0.3865, -0.4365, -0.3640,
        -0.3183, -0.2837, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.4320, -0.3826, -0.4580, -0.2609, -0.3865, -0.4365, -0.3640,
        -0.3183, -0.2837, -0.2076, -0.4744, -0.1859, -0.3609, -0.3630, -0.3292,
        -0.3827, -0.2723, -0.1784, -0.2910, -0.4224, -0.3609, -0.2812, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4320, -0.3826, -0.4580, -0.2609, -0.3865, -0.4365, -0.3640,
        -0.3183, -0.2837, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.4320, -0.3826, -0.4580, -0.2609, -0.3865, -0.4365, -0.3640,
        -0.3183, -0.2837, -0.2076, -0.4744, -0.1859, -0.3609, -0.3630, -0.3292,
        -0.3827, -0.2723, -0.1784, -0.2910, -0.4224, -0.3609, -0.2812, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4320, -0.3826, -0.4580, -0.2609, -0.3865, -0.4365, -0.3640,
        -0.3183, -0.2837, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.4320, -0.3826, -0.4580, -0.2609, -0.3865, -0.4365, -0.3640,
        -0.3183, -0.2837, -0.2076, -0.4744, -0.1859, -0.3609, -0.3630, -0.3292,
        -0.3827, -0.2723, -0.1784, -0.2910, -0.4224, -0.3609, -0.2812, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4320, -0.3826, -0.4580, -0.2609, -0.3865, -0.4365, -0.3640,
        -0.3183, -0.2837, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.4320, -0.3826, -0.4580, -0.2609, -0.3865, -0.4365, -0.3640,
        -0.3183, -0.2837, -0.2076, -0.4744, -0.1859, -0.3609, -0.3630, -0.3292,
        -0.3827, -0.2723, -0.1784, -0.2910, -0.4224, -0.3609, -0.2812, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4320, -0.3826, -0.4580, -0.2609, -0.3865, -0.4365, -0.3640,
        -0.3183, -0.2837, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.4320, -0.3826, -0.4580, -0.2609, -0.3865, -0.4365, -0.3640,
        -0.3183, -0.2837, -0.2076, -0.4744, -0.1859, -0.3609, -0.3630, -0.3292,
        -0.3827, -0.2723, -0.1784, -0.2910, -0.4224, -0.3609, -0.2812, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4320, -0.3826, -0.4580, -0.2609, -0.3865, -0.4365, -0.3640,
        -0.3183, -0.2837, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.4320, -0.3826, -0.4580, -0.2609, -0.3865, -0.4365, -0.3640,
        -0.3183, -0.2837, -0.2076, -0.4744, -0.1859, -0.3609, -0.3630, -0.3292,
        -0.3827, -0.2723, -0.1784, -0.2910, -0.4224, -0.3609, -0.2812, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4320, -0.3826, -0.4580, -0.2609, -0.3865, -0.4365, -0.3640,
        -0.3183, -0.2837, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.4320, -0.3826, -0.4580, -0.2609, -0.3865, -0.4365, -0.3640,
        -0.3183, -0.2837, -0.2076, -0.4744, -0.1859, -0.3609, -0.3630, -0.3292,
        -0.3827, -0.2723, -0.1784, -0.2910, -0.4224, -0.3609, -0.2812, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4320, -0.3826, -0.4580, -0.2609, -0.3865, -0.4365, -0.3640,
        -0.3183, -0.2837, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.4320, -0.3826, -0.4580, -0.2609, -0.3865, -0.4365, -0.3640,
        -0.3183, -0.2837, -0.2076, -0.4744, -0.1859, -0.3609, -0.3630, -0.3292,
        -0.3827, -0.2723, -0.1784, -0.2910, -0.4224, -0.3609, -0.2812, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4320, -0.3826, -0.4580, -0.2609, -0.3865, -0.4365, -0.3640,
        -0.3183, -0.2837, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.4320, -0.3826, -0.4580, -0.2609, -0.3865, -0.4365, -0.3640,
        -0.3183, -0.2837, -0.2076, -0.4744, -0.1859, -0.3609, -0.3630, -0.3292,
        -0.3827, -0.2723, -0.1784, -0.2910, -0.4224, -0.3609, -0.2812, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4320, -0.3826, -0.4580, -0.2609, -0.3865, -0.4365, -0.3640,
        -0.3183, -0.2837, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.3802, -0.3025, -0.3420, -0.3578, -0.4477, -0.3507, -0.3786,
        -0.2076, -0.4744, -0.2245, -0.3940, -0.2306, -0.3342, -0.2336, -0.3741,
        -0.3528, -0.4642, -0.3919, -0.3842, -0.4224, -0.3715, -0.4403, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3802, -0.3025, -0.3420, -0.3578, -0.4477, -0.3507, -0.3786,
        -0.2076, -0.4744, -0.2245, -

Tokens embeddings:
tensor([-0.3412, -0.3802, -0.3025, -0.3420, -0.3578, -0.4477, -0.3507, -0.3786,
        -0.2076, -0.4744, -0.2245, -0.3940, -0.2306, -0.3342, -0.2336, -0.3741,
        -0.3528, -0.4642, -0.3919, -0.3842, -0.4224, -0.3715, -0.4403, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3802, -0.3025, -0.3420, -0.3578, -0.4477, -0.3507, -0.3786,
        -0.2076, -0.4744, -0.2245, -

Tokens embeddings:
tensor([-0.3412, -0.3802, -0.3025, -0.3420, -0.3578, -0.4477, -0.3507, -0.3786,
        -0.2076, -0.4744, -0.2245, -0.3940, -0.2306, -0.3342, -0.2336, -0.3741,
        -0.3528, -0.4642, -0.3919, -0.3842, -0.4224, -0.3715, -0.4403, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3802, -0.3025, -0.3420, -0.3578, -0.4477, -0.3507, -0.3786,
        -0.2076, -0.4744, -0.2245, -

Tokens embeddings:
tensor([-0.3412, -0.3802, -0.3025, -0.3420, -0.3578, -0.4477, -0.3507, -0.3786,
        -0.2076, -0.4744, -0.2245, -0.3940, -0.2306, -0.3342, -0.2336, -0.3741,
        -0.3528, -0.4642, -0.3919, -0.3842, -0.4224, -0.3715, -0.4403, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3802, -0.3025, -0.3420, -0.3578, -0.4477, -0.3507, -0.3786,
        -0.2076, -0.4744, -0.2245, -

Tokens embeddings:
tensor([-0.3412, -0.3802, -0.3025, -0.3420, -0.3578, -0.4477, -0.3507, -0.3786,
        -0.2076, -0.4744, -0.2245, -0.3940, -0.2306, -0.3342, -0.2336, -0.3741,
        -0.3528, -0.4642, -0.3919, -0.3842, -0.4224, -0.3715, -0.4403, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3802, -0.3025, -0.3420, -0.3578, -0.4477, -0.3507, -0.3786,
        -0.2076, -0.4744, -0.2245, -

Tokens embeddings:
tensor([-0.3412, -0.3802, -0.3025, -0.3420, -0.3578, -0.4477, -0.3507, -0.3786,
        -0.2076, -0.4744, -0.2245, -0.3940, -0.2306, -0.3342, -0.2336, -0.3741,
        -0.3528, -0.4642, -0.3919, -0.3842, -0.4224, -0.3715, -0.4403, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3802, -0.3025, -0.3420, -0.3578, -0.4477, -0.3507, -0.3786,
        -0.2076, -0.4744, -0.2245, -

Tokens embeddings:
tensor([-0.3412, -0.3802, -0.3025, -0.3420, -0.3578, -0.4477, -0.3507, -0.3786,
        -0.2076, -0.4744, -0.2245, -0.3940, -0.2306, -0.3342, -0.2336, -0.3741,
        -0.3528, -0.4642, -0.3919, -0.3842, -0.4224, -0.3715, -0.4403, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3802, -0.3025, -0.3420, -0.3578, -0.4477, -0.3507, -0.3786,
        -0.2076, -0.4744, -0.2245, -

Tokens embeddings:
tensor([-0.3412, -0.3802, -0.3025, -0.3420, -0.3578, -0.4477, -0.3507, -0.3786,
        -0.2076, -0.4744, -0.2245, -0.3940, -0.2306, -0.3342, -0.2336, -0.3741,
        -0.3528, -0.4642, -0.3919, -0.3842, -0.4224, -0.3715, -0.4403, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3802, -0.3025, -0.3420, -0.3578, -0.4477, -0.3507, -0.3786,
        -0.2076, -0.4744, -0.2245, -

Tokens embeddings:
tensor([-0.3412, -0.3802, -0.3025, -0.3420, -0.3578, -0.4477, -0.3507, -0.3786,
        -0.2076, -0.4744, -0.2245, -0.3940, -0.2306, -0.3342, -0.2336, -0.3741,
        -0.3528, -0.4642, -0.3919, -0.3842, -0.4224, -0.3715, -0.4403, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3802, -0.3025, -0.3420, -0.3578, -0.4477, -0.3507, -0.3786,
        -0.2076, -0.4744, -0.2245, -

Tokens embeddings:
tensor([-0.3412, -0.3802, -0.3025, -0.3420, -0.3578, -0.4477, -0.3507, -0.3786,
        -0.2076, -0.4744, -0.2245, -0.3940, -0.2306, -0.3342, -0.2336, -0.3741,
        -0.3528, -0.4642, -0.3919, -0.3842, -0.4224, -0.3715, -0.4403, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3802, -0.3025, -0.3420, -0.3578, -0.4477, -0.3507, -0.3786,
        -0.2076, -0.4744, -0.2245, -

Tokens embeddings:
tensor([-0.3412, -0.3802, -0.3025, -0.3420, -0.3578, -0.4477, -0.3507, -0.3786,
        -0.2076, -0.4744, -0.2245, -0.3940, -0.2306, -0.3342, -0.2336, -0.3741,
        -0.3528, -0.4642, -0.3919, -0.3842, -0.4224, -0.3715, -0.4403, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3802, -0.3025, -0.3420, -0.3578, -0.4477, -0.3507, -0.3786,
        -0.2076, -0.4744, -0.2245, -

Tokens embeddings:
tensor([-0.3412, -0.3802, -0.3025, -0.3420, -0.3578, -0.4477, -0.3507, -0.3786,
        -0.2076, -0.4744, -0.2245, -0.3940, -0.2306, -0.3342, -0.2336, -0.3741,
        -0.3528, -0.4642, -0.3919, -0.3842, -0.4224, -0.3715, -0.4403, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3802, -0.3025, -0.3420, -0.3578, -0.4477, -0.3507, -0.3786,
        -0.2076, -0.4744, -0.2245, -

Tokens embeddings:
tensor([-0.3412, -0.4442, -0.2076, -0.3788, -0.3598, -0.3218, -0.4119, -0.3013,
        -0.3063, -0.3741, -0.4744, -0.3846, -0.2836, -0.3489, -0.4068, -0.4938,
        -0.3802, -0.4612, -0.3548, -0.3814, -0.4744, -0.5131, -0.3741, -0.3890,
        -0.4744, -0.3081, -0.0990, -0.0990, -0.0990, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4442, -0.2076, -0.3788, -0.3598, -0.3218, -0.4119, -0.3013,
        -0.3063, -0.3741, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.4442, -0.2076, -0.3788, -0.3598, -0.3218, -0.4119, -0.3013,
        -0.3063, -0.3741, -0.4744, -0.3846, -0.2836, -0.3489, -0.4068, -0.4938,
        -0.3802, -0.4612, -0.3548, -0.3814, -0.4744, -0.5131, -0.3741, -0.3890,
        -0.4744, -0.3081, -0.0990, -0.0990, -0.0990, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4442, -0.2076, -0.3788, -0.3598, -0.3218, -0.4119, -0.3013,
        -0.3063, -0.3741, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.4442, -0.2076, -0.3788, -0.3598, -0.3218, -0.4119, -0.3013,
        -0.3063, -0.3741, -0.4744, -0.3846, -0.2836, -0.3489, -0.4068, -0.4938,
        -0.3802, -0.4612, -0.3548, -0.3814, -0.4744, -0.5131, -0.3741, -0.3890,
        -0.4744, -0.3081, -0.0990, -0.0990, -0.0990, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4442, -0.2076, -0.3788, -0.3598, -0.3218, -0.4119, -0.3013,
        -0.3063, -0.3741, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.4442, -0.2076, -0.3788, -0.3598, -0.3218, -0.4119, -0.3013,
        -0.3063, -0.3741, -0.4744, -0.3846, -0.2836, -0.3489, -0.4068, -0.4938,
        -0.3802, -0.4612, -0.3548, -0.3814, -0.4744, -0.5131, -0.3741, -0.3890,
        -0.4744, -0.3081, -0.0990, -0.0990, -0.0990, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4442, -0.2076, -0.3788, -0.3598, -0.3218, -0.4119, -0.3013,
        -0.3063, -0.3741, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.4442, -0.2076, -0.3788, -0.3598, -0.3218, -0.4119, -0.3013,
        -0.3063, -0.3741, -0.4744, -0.3846, -0.2836, -0.3489, -0.4068, -0.4938,
        -0.3802, -0.4612, -0.3548, -0.3814, -0.4744, -0.5131, -0.3741, -0.3890,
        -0.4744, -0.3081, -0.0990, -0.0990, -0.0990, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4442, -0.2076, -0.3788, -0.3598, -0.3218, -0.4119, -0.3013,
        -0.3063, -0.3741, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.4442, -0.2076, -0.3788, -0.3598, -0.3218, -0.4119, -0.3013,
        -0.3063, -0.3741, -0.4744, -0.3846, -0.2836, -0.3489, -0.4068, -0.4938,
        -0.3802, -0.4612, -0.3548, -0.3814, -0.4744, -0.5131, -0.3741, -0.3890,
        -0.4744, -0.3081, -0.0990, -0.0990, -0.0990, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4442, -0.2076, -0.3788, -0.3598, -0.3218, -0.4119, -0.3013,
        -0.3063, -0.3741, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.4442, -0.2076, -0.3788, -0.3598, -0.3218, -0.4119, -0.3013,
        -0.3063, -0.3741, -0.4744, -0.3846, -0.2836, -0.3489, -0.4068, -0.4938,
        -0.3802, -0.4612, -0.3548, -0.3814, -0.4744, -0.5131, -0.3741, -0.3890,
        -0.4744, -0.3081, -0.0990, -0.0990, -0.0990, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4442, -0.2076, -0.3788, -0.3598, -0.3218, -0.4119, -0.3013,
        -0.3063, -0.3741, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.4442, -0.2076, -0.3788, -0.3598, -0.3218, -0.4119, -0.3013,
        -0.3063, -0.3741, -0.4744, -0.3846, -0.2836, -0.3489, -0.4068, -0.4938,
        -0.3802, -0.4612, -0.3548, -0.3814, -0.4744, -0.5131, -0.3741, -0.3890,
        -0.4744, -0.3081, -0.0990, -0.0990, -0.0990, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4442, -0.2076, -0.3788, -0.3598, -0.3218, -0.4119, -0.3013,
        -0.3063, -0.3741, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.4442, -0.2076, -0.3788, -0.3598, -0.3218, -0.4119, -0.3013,
        -0.3063, -0.3741, -0.4744, -0.3846, -0.2836, -0.3489, -0.4068, -0.4938,
        -0.3802, -0.4612, -0.3548, -0.3814, -0.4744, -0.5131, -0.3741, -0.3890,
        -0.4744, -0.3081, -0.0990, -0.0990, -0.0990, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4442, -0.2076, -0.3788, -0.3598, -0.3218, -0.4119, -0.3013,
        -0.3063, -0.3741, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.4442, -0.2076, -0.3788, -0.3598, -0.3218, -0.4119, -0.3013,
        -0.3063, -0.3741, -0.4744, -0.3846, -0.2836, -0.3489, -0.4068, -0.4938,
        -0.3802, -0.4612, -0.3548, -0.3814, -0.4744, -0.5131, -0.3741, -0.3890,
        -0.4744, -0.3081, -0.0990, -0.0990, -0.0990, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4442, -0.2076, -0.3788, -0.3598, -0.3218, -0.4119, -0.3013,
        -0.3063, -0.3741, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.4442, -0.2076, -0.3788, -0.3598, -0.3218, -0.4119, -0.3013,
        -0.3063, -0.3741, -0.4744, -0.3846, -0.2836, -0.3489, -0.4068, -0.4938,
        -0.3802, -0.4612, -0.3548, -0.3814, -0.4744, -0.5131, -0.3741, -0.3890,
        -0.4744, -0.3081, -0.0990, -0.0990, -0.0990, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4442, -0.2076, -0.3788, -0.3598, -0.3218, -0.4119, -0.3013,
        -0.3063, -0.3741, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.4442, -0.2076, -0.3788, -0.3598, -0.3218, -0.4119, -0.3013,
        -0.3063, -0.3741, -0.4744, -0.3846, -0.2836, -0.3489, -0.4068, -0.4938,
        -0.3802, -0.4612, -0.3548, -0.3814, -0.4744, -0.5131, -0.3741, -0.3890,
        -0.4744, -0.3081, -0.0990, -0.0990, -0.0990, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4442, -0.2076, -0.3788, -0.3598, -0.3218, -0.4119, -0.3013,
        -0.3063, -0.3741, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.3822, -0.4146, -0.3226, -0.3258, -0.3668, -0.4119, -0.2779,
        -0.3273, -0.2910, -0.3044, -0.4316, -0.4612, -0.3548, -0.3741, -0.4073,
        -0.2910, -0.4744, -0.2705, -0.4894, -0.3842, -0.4224, -0.3822, -0.4508,
        -0.2076, -0.4054, -0.3218, -0.4119, -0.3013, -0.4413, -0.4744, -0.3356,
        -0.2723, -0.4505, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3822, -0.4146, -0.3226, -0.3258, -0.3668, -0.4119, -0.2779,
        -0.3273, -0.2910, -0.3044, -

Tokens embeddings:
tensor([-0.3412, -0.3822, -0.4146, -0.3226, -0.3258, -0.3668, -0.4119, -0.2779,
        -0.3273, -0.2910, -0.3044, -0.4316, -0.4612, -0.3548, -0.3741, -0.4073,
        -0.2910, -0.4744, -0.2705, -0.4894, -0.3842, -0.4224, -0.3822, -0.4508,
        -0.2076, -0.4054, -0.3218, -0.4119, -0.3013, -0.4413, -0.4744, -0.3356,
        -0.2723, -0.4505, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3822, -0.4146, -0.3226, -0.3258, -0.3668, -0.4119, -0.2779,
        -0.3273, -0.2910, -0.3044, -

Tokens embeddings:
tensor([-0.3412, -0.3822, -0.4146, -0.3226, -0.3258, -0.3668, -0.4119, -0.2779,
        -0.3273, -0.2910, -0.3044, -0.4316, -0.4612, -0.3548, -0.3741, -0.4073,
        -0.2910, -0.4744, -0.2705, -0.4894, -0.3842, -0.4224, -0.3822, -0.4508,
        -0.2076, -0.4054, -0.3218, -0.4119, -0.3013, -0.4413, -0.4744, -0.3356,
        -0.2723, -0.4505, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3822, -0.4146, -0.3226, -0.3258, -0.3668, -0.4119, -0.2779,
        -0.3273, -0.2910, -0.3044, -

Tokens embeddings:
tensor([-0.3412, -0.3822, -0.4146, -0.3226, -0.3258, -0.3668, -0.4119, -0.2779,
        -0.3273, -0.2910, -0.3044, -0.4316, -0.4612, -0.3548, -0.3741, -0.4073,
        -0.2910, -0.4744, -0.2705, -0.4894, -0.3842, -0.4224, -0.3822, -0.4508,
        -0.2076, -0.4054, -0.3218, -0.4119, -0.3013, -0.4413, -0.4744, -0.3356,
        -0.2723, -0.4505, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3822, -0.4146, -0.3226, -0.3258, -0.3668, -0.4119, -0.2779,
        -0.3273, -0.2910, -0.3044, -

Tokens embeddings:
tensor([-0.3412, -0.3822, -0.4146, -0.3226, -0.3258, -0.3668, -0.4119, -0.2779,
        -0.3273, -0.2910, -0.3044, -0.4316, -0.4612, -0.3548, -0.3741, -0.4073,
        -0.2910, -0.4744, -0.2705, -0.4894, -0.3842, -0.4224, -0.3822, -0.4508,
        -0.2076, -0.4054, -0.3218, -0.4119, -0.3013, -0.4413, -0.4744, -0.3356,
        -0.2723, -0.4505, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3822, -0.4146, -0.3226, -0.3258, -0.3668, -0.4119, -0.2779,
        -0.3273, -0.2910, -0.3044, -

Tokens embeddings:
tensor([-0.3412, -0.3822, -0.4146, -0.3226, -0.3258, -0.3668, -0.4119, -0.2779,
        -0.3273, -0.2910, -0.3044, -0.4316, -0.4612, -0.3548, -0.3741, -0.4073,
        -0.2910, -0.4744, -0.2705, -0.4894, -0.3842, -0.4224, -0.3822, -0.4508,
        -0.2076, -0.4054, -0.3218, -0.4119, -0.3013, -0.4413, -0.4744, -0.3356,
        -0.2723, -0.4505, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3822, -0.4146, -0.3226, -0.3258, -0.3668, -0.4119, -0.2779,
        -0.3273, -0.2910, -0.3044, -

Tokens embeddings:
tensor([-0.3412, -0.3822, -0.4146, -0.3226, -0.3258, -0.3668, -0.4119, -0.2779,
        -0.3273, -0.2910, -0.3044, -0.4316, -0.4612, -0.3548, -0.3741, -0.4073,
        -0.2910, -0.4744, -0.2705, -0.4894, -0.3842, -0.4224, -0.3822, -0.4508,
        -0.2076, -0.4054, -0.3218, -0.4119, -0.3013, -0.4413, -0.4744, -0.3356,
        -0.2723, -0.4505, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3822, -0.4146, -0.3226, -0.3258, -0.3668, -0.4119, -0.2779,
        -0.3273, -0.2910, -0.3044, -

Tokens embeddings:
tensor([-0.3412, -0.3822, -0.4146, -0.3226, -0.3258, -0.3668, -0.4119, -0.2779,
        -0.3273, -0.2910, -0.3044, -0.4316, -0.4612, -0.3548, -0.3741, -0.4073,
        -0.2910, -0.4744, -0.2705, -0.4894, -0.3842, -0.4224, -0.3822, -0.4508,
        -0.2076, -0.4054, -0.3218, -0.4119, -0.3013, -0.4413, -0.4744, -0.3356,
        -0.2723, -0.4505, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3822, -0.4146, -0.3226, -0.3258, -0.3668, -0.4119, -0.2779,
        -0.3273, -0.2910, -0.3044, -

Tokens embeddings:
tensor([-0.3412, -0.3822, -0.4146, -0.3226, -0.3258, -0.3668, -0.4119, -0.2779,
        -0.3273, -0.2910, -0.3044, -0.4316, -0.4612, -0.3548, -0.3741, -0.4073,
        -0.2910, -0.4744, -0.2705, -0.4894, -0.3842, -0.4224, -0.3822, -0.4508,
        -0.2076, -0.4054, -0.3218, -0.4119, -0.3013, -0.4413, -0.4744, -0.3356,
        -0.2723, -0.4505, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3822, -0.4146, -0.3226, -0.3258, -0.3668, -0.4119, -0.2779,
        -0.3273, -0.2910, -0.3044, -

Tokens embeddings:
tensor([-0.3412, -0.3822, -0.4146, -0.3226, -0.3258, -0.3668, -0.4119, -0.2779,
        -0.3273, -0.2910, -0.3044, -0.4316, -0.4612, -0.3548, -0.3741, -0.4073,
        -0.2910, -0.4744, -0.2705, -0.4894, -0.3842, -0.4224, -0.3822, -0.4508,
        -0.2076, -0.4054, -0.3218, -0.4119, -0.3013, -0.4413, -0.4744, -0.3356,
        -0.2723, -0.4505, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3822, -0.4146, -0.3226, -0.3258, -0.3668, -0.4119, -0.2779,
        -0.3273, -0.2910, -0.3044, -

Tokens embeddings:
tensor([-0.3412, -0.3822, -0.4146, -0.3226, -0.3258, -0.3668, -0.4119, -0.2779,
        -0.3273, -0.2910, -0.3044, -0.4316, -0.4612, -0.3548, -0.3741, -0.4073,
        -0.2910, -0.4744, -0.2705, -0.4894, -0.3842, -0.4224, -0.3822, -0.4508,
        -0.2076, -0.4054, -0.3218, -0.4119, -0.3013, -0.4413, -0.4744, -0.3356,
        -0.2723, -0.4505, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3822, -0.4146, -0.3226, -0.3258, -0.3668, -0.4119, -0.2779,
        -0.3273, -0.2910, -0.3044, -

Tokens embeddings:
tensor([-0.3412, -0.3822, -0.4146, -0.3226, -0.3258, -0.3668, -0.4119, -0.2779,
        -0.3273, -0.2910, -0.3044, -0.4316, -0.4612, -0.3548, -0.3741, -0.4073,
        -0.2910, -0.4744, -0.2705, -0.4894, -0.3842, -0.4224, -0.3822, -0.4508,
        -0.2076, -0.4054, -0.3218, -0.4119, -0.3013, -0.4413, -0.4744, -0.3356,
        -0.2723, -0.4505, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3822, -0.4146, -0.3226, -0.3258, -0.3668, -0.4119, -0.2779,
        -0.3273, -0.2910, -0.3044, -

Tokens embeddings:
tensor([-0.3412, -0.3741, -0.4224, -0.3859, -0.3195, -0.4090, -0.2723, -0.4744,
        -0.3042, -0.2753, -0.2076, -0.4744, -0.2522, -0.2663, -0.4372, -0.3791,
        -0.0990, -0.1449, -0.1676, -0.2734, -0.2076, -0.3292, -0.4407, -0.4382,
        -0.3741, -0.4744, -0.4043, -0.2076, -0.5139, -0.4190, -0.4720, -0.4894,
        -0.3842, -0.4006, -0.4706, -0.3654, -0.3741, -0.3651, -0.4744, -0.2589,
        -0.4119, -0.4338, -0.4119, -0.4119, -0.3456, -0.3784, -0.4119, -0.4119,
        -0.3753, -0.3944, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3741, -0.4224, -0.3859, -0.3195, -0.4090, -0.2723, -0.4744,
        -0.3042, -0.2753, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.3741, -0.4224, -0.3859, -0.3195, -0.4090, -0.2723, -0.4744,
        -0.3042, -0.2753, -0.2076, -0.4744, -0.2522, -0.2663, -0.4372, -0.3791,
        -0.0990, -0.1449, -0.1676, -0.2734, -0.2076, -0.3292, -0.4407, -0.4382,
        -0.3741, -0.4744, -0.4043, -0.2076, -0.5139, -0.4190, -0.4720, -0.4894,
        -0.3842, -0.4006, -0.4706, -0.3654, -0.3741, -0.3651, -0.4744, -0.2589,
        -0.4119, -0.4338, -0.4119, -0.4119, -0.3456, -0.3784, -0.4119, -0.4119,
        -0.3753, -0.3944, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3741, -0.4224, -0.3859, -0.3195, -0.4090, -0.2723, -0.4744,
        -0.3042, -0.2753, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.3741, -0.4224, -0.3859, -0.3195, -0.4090, -0.2723, -0.4744,
        -0.3042, -0.2753, -0.2076, -0.4744, -0.2522, -0.2663, -0.4372, -0.3791,
        -0.0990, -0.1449, -0.1676, -0.2734, -0.2076, -0.3292, -0.4407, -0.4382,
        -0.3741, -0.4744, -0.4043, -0.2076, -0.5139, -0.4190, -0.4720, -0.4894,
        -0.3842, -0.4006, -0.4706, -0.3654, -0.3741, -0.3651, -0.4744, -0.2589,
        -0.4119, -0.4338, -0.4119, -0.4119, -0.3456, -0.3784, -0.4119, -0.4119,
        -0.3753, -0.3944, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3741, -0.4224, -0.3859, -0.3195, -0.4090, -0.2723, -0.4744,
        -0.3042, -0.2753, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.3741, -0.4224, -0.3859, -0.3195, -0.4090, -0.2723, -0.4744,
        -0.3042, -0.2753, -0.2076, -0.4744, -0.2522, -0.2663, -0.4372, -0.3791,
        -0.0990, -0.1449, -0.1676, -0.2734, -0.2076, -0.3292, -0.4407, -0.4382,
        -0.3741, -0.4744, -0.4043, -0.2076, -0.5139, -0.4190, -0.4720, -0.4894,
        -0.3842, -0.4006, -0.4706, -0.3654, -0.3741, -0.3651, -0.4744, -0.2589,
        -0.4119, -0.4338, -0.4119, -0.4119, -0.3456, -0.3784, -0.4119, -0.4119,
        -0.3753, -0.3944, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3741, -0.4224, -0.3859, -0.3195, -0.4090, -0.2723, -0.4744,
        -0.3042, -0.2753, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.3741, -0.4224, -0.3859, -0.3195, -0.4090, -0.2723, -0.4744,
        -0.3042, -0.2753, -0.2076, -0.4744, -0.2522, -0.2663, -0.4372, -0.3791,
        -0.0990, -0.1449, -0.1676, -0.2734, -0.2076, -0.3292, -0.4407, -0.4382,
        -0.3741, -0.4744, -0.4043, -0.2076, -0.5139, -0.4190, -0.4720, -0.4894,
        -0.3842, -0.4006, -0.4706, -0.3654, -0.3741, -0.3651, -0.4744, -0.2589,
        -0.4119, -0.4338, -0.4119, -0.4119, -0.3456, -0.3784, -0.4119, -0.4119,
        -0.3753, -0.3944, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3741, -0.4224, -0.3859, -0.3195, -0.4090, -0.2723, -0.4744,
        -0.3042, -0.2753, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.3741, -0.4224, -0.3859, -0.3195, -0.4090, -0.2723, -0.4744,
        -0.3042, -0.2753, -0.2076, -0.4744, -0.2522, -0.2663, -0.4372, -0.3791,
        -0.0990, -0.1449, -0.1676, -0.2734, -0.2076, -0.3292, -0.4407, -0.4382,
        -0.3741, -0.4744, -0.4043, -0.2076, -0.5139, -0.4190, -0.4720, -0.4894,
        -0.3842, -0.4006, -0.4706, -0.3654, -0.3741, -0.3651, -0.4744, -0.2589,
        -0.4119, -0.4338, -0.4119, -0.4119, -0.3456, -0.3784, -0.4119, -0.4119,
        -0.3753, -0.3944, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3741, -0.4224, -0.3859, -0.3195, -0.4090, -0.2723, -0.4744,
        -0.3042, -0.2753, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.3741, -0.4224, -0.3859, -0.3195, -0.4090, -0.2723, -0.4744,
        -0.3042, -0.2753, -0.2076, -0.4744, -0.2522, -0.2663, -0.4372, -0.3791,
        -0.0990, -0.1449, -0.1676, -0.2734, -0.2076, -0.3292, -0.4407, -0.4382,
        -0.3741, -0.4744, -0.4043, -0.2076, -0.5139, -0.4190, -0.4720, -0.4894,
        -0.3842, -0.4006, -0.4706, -0.3654, -0.3741, -0.3651, -0.4744, -0.2589,
        -0.4119, -0.4338, -0.4119, -0.4119, -0.3456, -0.3784, -0.4119, -0.4119,
        -0.3753, -0.3944, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3741, -0.4224, -0.3859, -0.3195, -0.4090, -0.2723, -0.4744,
        -0.3042, -0.2753, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.3741, -0.4224, -0.3859, -0.3195, -0.4090, -0.2723, -0.4744,
        -0.3042, -0.2753, -0.2076, -0.4744, -0.2522, -0.2663, -0.4372, -0.3791,
        -0.0990, -0.1449, -0.1676, -0.2734, -0.2076, -0.3292, -0.4407, -0.4382,
        -0.3741, -0.4744, -0.4043, -0.2076, -0.5139, -0.4190, -0.4720, -0.4894,
        -0.3842, -0.4006, -0.4706, -0.3654, -0.3741, -0.3651, -0.4744, -0.2589,
        -0.4119, -0.4338, -0.4119, -0.4119, -0.3456, -0.3784, -0.4119, -0.4119,
        -0.3753, -0.3944, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3741, -0.4224, -0.3859, -0.3195, -0.4090, -0.2723, -0.4744,
        -0.3042, -0.2753, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.3741, -0.4224, -0.3859, -0.3195, -0.4090, -0.2723, -0.4744,
        -0.3042, -0.2753, -0.2076, -0.4744, -0.2522, -0.2663, -0.4372, -0.3791,
        -0.0990, -0.1449, -0.1676, -0.2734, -0.2076, -0.3292, -0.4407, -0.4382,
        -0.3741, -0.4744, -0.4043, -0.2076, -0.5139, -0.4190, -0.4720, -0.4894,
        -0.3842, -0.4006, -0.4706, -0.3654, -0.3741, -0.3651, -0.4744, -0.2589,
        -0.4119, -0.4338, -0.4119, -0.4119, -0.3456, -0.3784, -0.4119, -0.4119,
        -0.3753, -0.3944, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3741, -0.4224, -0.3859, -0.3195, -0.4090, -0.2723, -0.4744,
        -0.3042, -0.2753, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.3741, -0.4224, -0.3859, -0.3195, -0.4090, -0.2723, -0.4744,
        -0.3042, -0.2753, -0.2076, -0.4744, -0.2522, -0.2663, -0.4372, -0.3791,
        -0.0990, -0.1449, -0.1676, -0.2734, -0.2076, -0.3292, -0.4407, -0.4382,
        -0.3741, -0.4744, -0.4043, -0.2076, -0.5139, -0.4190, -0.4720, -0.4894,
        -0.3842, -0.4006, -0.4706, -0.3654, -0.3741, -0.3651, -0.4744, -0.2589,
        -0.4119, -0.4338, -0.4119, -0.4119, -0.3456, -0.3784, -0.4119, -0.4119,
        -0.3753, -0.3944, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3741, -0.4224, -0.3859, -0.3195, -0.4090, -0.2723, -0.4744,
        -0.3042, -0.2753, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.3741, -0.4224, -0.3859, -0.3195, -0.4090, -0.2723, -0.4744,
        -0.3042, -0.2753, -0.2076, -0.4744, -0.2522, -0.2663, -0.4372, -0.3791,
        -0.0990, -0.1449, -0.1676, -0.2734, -0.2076, -0.3292, -0.4407, -0.4382,
        -0.3741, -0.4744, -0.4043, -0.2076, -0.5139, -0.4190, -0.4720, -0.4894,
        -0.3842, -0.4006, -0.4706, -0.3654, -0.3741, -0.3651, -0.4744, -0.2589,
        -0.4119, -0.4338, -0.4119, -0.4119, -0.3456, -0.3784, -0.4119, -0.4119,
        -0.3753, -0.3944, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3741, -0.4224, -0.3859, -0.3195, -0.4090, -0.2723, -0.4744,
        -0.3042, -0.2753, -0.2076, -

Tokens embeddings:
tensor([-0.3412, -0.3741, -0.4224, -0.3859, -0.3195, -0.4090, -0.2723, -0.4744,
        -0.3042, -0.2753, -0.2076, -0.4744, -0.2522, -0.2663, -0.4372, -0.3791,
        -0.0990, -0.1449, -0.1676, -0.2734, -0.2076, -0.3292, -0.4407, -0.4382,
        -0.3741, -0.4744, -0.4043, -0.2076, -0.5139, -0.4190, -0.4720, -0.4894,
        -0.3842, -0.4006, -0.4706, -0.3654, -0.3741, -0.3651, -0.4744, -0.2589,
        -0.4119, -0.4338, -0.4119, -0.4119, -0.3456, -0.3784, -0.4119, -0.4119,
        -0.3753, -0.3944, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3741, -0.4224, -0.3859, -0.3195, -0.4090, -0.2723, -0.4744,
        -0.3042, -0.2753, -0.2076, -

In [None]:
# Max Pooling for all tokens in sentences and argmax

# Select some sentences randomly
sent_index = [0,1,2,3,4,5,6,7,8,9,10]

for s in sent_index:
  print("*" * 100)
  # Get the sentence's words
  tokens = test_inputs[s]
  # For each layer...
  for l in range(12):
    print("\nLayer", l+1)
    attention = np.squeeze(test_attentions[s][l].detach().cpu().numpy(), axis=0)
    # and for each head
    #print("Head", h+1)
    # Get the attention for the cls token
    encoded_tokens = bert_tokenizer(tokens, truncation=True, padding=True, max_length=128, return_tensors='pt')
    encoded_tokens = encoded_tokens.to(device)
    with torch.no_grad():
      model_output1 = model_e(**encoded_tokens)
      tokens_embeddings = max_pooling(model_output1, encoded_tokens['attention_mask'])
      tokens_embeddings = tokens_embeddings.cpu()
      cls_attentions = head[0]
      display(HTML(colorize(tokens, cls_attentions)))
      print("Tokens embeddings:")
      print(tokens_embeddings)
      arg = argmax(tokens_embeddings)
      print('arg max of %s: %d' % (tokens_embeddings, arg))

****************************************************************************************************

Layer 1


Tokens embeddings:
tensor([-1.0233e-02, -4.3960e-01, -1.0000e+09, -3.5217e-01, -3.7311e-01,
        -1.0000e+09, -2.3131e-01, -4.2868e-01, -1.0000e+09, -3.5217e-01,
        -2.3249e-01, -1.0000e+09, -3.5217e-01, -1.3333e-01, -1.0000e+09,
        -3.2490e-01, -4.3563e-01, -1.0000e+09, -2.9826e-01, -2.2176e-01,
        -1.0000e+09, -2.8190e-01, -2.8394e-01, -1.0000e+09, -1.6137e-01,
        -4.4856e-01, -1.0000e+09, -2.7131e-01, -4.5053e-01, -1.0000e+09,
        -1.6481e-01, -4.2241e-01, -1.0000e+09, -2.8151e-01, -4.3667e-01,
        -1.0000e+09, -1.8529e-01, -6.1033e-01, -1.0000e+09, -1.3764e-01,
        -2.8116e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.3960e-01, -1.0000e+09, -3.5217e-01, -3.7311e-01,
        -1.0000e+09, -2.3131e-01, -4.2868e-01, -1.0000e+09, -3.5217e-01,
        -2.3249e-01, -1.0000e+09, -3.5217e-01, -1.3333e-01, -1.0000e+09,
        -3.2490e-01, -4.3563e-01, -1.0000e+09, -2.9826e-01, -2.2176e-01,
        -1.0000e+09, -2.8190e-01, -2.8394e-01, -1.0000e+09, -1.6137e-01,
        -4.4856e-01, -1.0000e+09, -2.7131e-01, -4.5053e-01, -1.0000e+09,
        -1.6481e-01, -4.2241e-01, -1.0000e+09, -2.8151e-01, -4.3667e-01,
        -1.0000e+09, -1.8529e-01, -6.1033e-01, -1.0000e+09, -1.3764e-01,
        -2.8116e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.3960e-01, -1.0000e+09, -3.5217e-01, -3.7311e-01,
        -1.0000e+09, -2.3131e-01, -4.2868e-01, -1.0000e+09, -3.5217e-01,
        -2.3249e-01, -1.0000e+09, -3.5217e-01, -1.3333e-01, -1.0000e+09,
        -3.2490e-01, -4.3563e-01, -1.0000e+09, -2.9826e-01, -2.2176e-01,
        -1.0000e+09, -2.8190e-01, -2.8394e-01, -1.0000e+09, -1.6137e-01,
        -4.4856e-01, -1.0000e+09, -2.7131e-01, -4.5053e-01, -1.0000e+09,
        -1.6481e-01, -4.2241e-01, -1.0000e+09, -2.8151e-01, -4.3667e-01,
        -1.0000e+09, -1.8529e-01, -6.1033e-01, -1.0000e+09, -1.3764e-01,
        -2.8116e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.3960e-01, -1.0000e+09, -3.5217e-01, -3.7311e-01,
        -1.0000e+09, -2.3131e-01, -4.2868e-01, -1.0000e+09, -3.5217e-01,
        -2.3249e-01, -1.0000e+09, -3.5217e-01, -1.3333e-01, -1.0000e+09,
        -3.2490e-01, -4.3563e-01, -1.0000e+09, -2.9826e-01, -2.2176e-01,
        -1.0000e+09, -2.8190e-01, -2.8394e-01, -1.0000e+09, -1.6137e-01,
        -4.4856e-01, -1.0000e+09, -2.7131e-01, -4.5053e-01, -1.0000e+09,
        -1.6481e-01, -4.2241e-01, -1.0000e+09, -2.8151e-01, -4.3667e-01,
        -1.0000e+09, -1.8529e-01, -6.1033e-01, -1.0000e+09, -1.3764e-01,
        -2.8116e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.3960e-01, -1.0000e+09, -3.5217e-01, -3.7311e-01,
        -1.0000e+09, -2.3131e-01, -4.2868e-01, -1.0000e+09, -3.5217e-01,
        -2.3249e-01, -1.0000e+09, -3.5217e-01, -1.3333e-01, -1.0000e+09,
        -3.2490e-01, -4.3563e-01, -1.0000e+09, -2.9826e-01, -2.2176e-01,
        -1.0000e+09, -2.8190e-01, -2.8394e-01, -1.0000e+09, -1.6137e-01,
        -4.4856e-01, -1.0000e+09, -2.7131e-01, -4.5053e-01, -1.0000e+09,
        -1.6481e-01, -4.2241e-01, -1.0000e+09, -2.8151e-01, -4.3667e-01,
        -1.0000e+09, -1.8529e-01, -6.1033e-01, -1.0000e+09, -1.3764e-01,
        -2.8116e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.3960e-01, -1.0000e+09, -3.5217e-01, -3.7311e-01,
        -1.0000e+09, -2.3131e-01, -4.2868e-01, -1.0000e+09, -3.5217e-01,
        -2.3249e-01, -1.0000e+09, -3.5217e-01, -1.3333e-01, -1.0000e+09,
        -3.2490e-01, -4.3563e-01, -1.0000e+09, -2.9826e-01, -2.2176e-01,
        -1.0000e+09, -2.8190e-01, -2.8394e-01, -1.0000e+09, -1.6137e-01,
        -4.4856e-01, -1.0000e+09, -2.7131e-01, -4.5053e-01, -1.0000e+09,
        -1.6481e-01, -4.2241e-01, -1.0000e+09, -2.8151e-01, -4.3667e-01,
        -1.0000e+09, -1.8529e-01, -6.1033e-01, -1.0000e+09, -1.3764e-01,
        -2.8116e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.3960e-01, -1.0000e+09, -3.5217e-01, -3.7311e-01,
        -1.0000e+09, -2.3131e-01, -4.2868e-01, -1.0000e+09, -3.5217e-01,
        -2.3249e-01, -1.0000e+09, -3.5217e-01, -1.3333e-01, -1.0000e+09,
        -3.2490e-01, -4.3563e-01, -1.0000e+09, -2.9826e-01, -2.2176e-01,
        -1.0000e+09, -2.8190e-01, -2.8394e-01, -1.0000e+09, -1.6137e-01,
        -4.4856e-01, -1.0000e+09, -2.7131e-01, -4.5053e-01, -1.0000e+09,
        -1.6481e-01, -4.2241e-01, -1.0000e+09, -2.8151e-01, -4.3667e-01,
        -1.0000e+09, -1.8529e-01, -6.1033e-01, -1.0000e+09, -1.3764e-01,
        -2.8116e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.3960e-01, -1.0000e+09, -3.5217e-01, -3.7311e-01,
        -1.0000e+09, -2.3131e-01, -4.2868e-01, -1.0000e+09, -3.5217e-01,
        -2.3249e-01, -1.0000e+09, -3.5217e-01, -1.3333e-01, -1.0000e+09,
        -3.2490e-01, -4.3563e-01, -1.0000e+09, -2.9826e-01, -2.2176e-01,
        -1.0000e+09, -2.8190e-01, -2.8394e-01, -1.0000e+09, -1.6137e-01,
        -4.4856e-01, -1.0000e+09, -2.7131e-01, -4.5053e-01, -1.0000e+09,
        -1.6481e-01, -4.2241e-01, -1.0000e+09, -2.8151e-01, -4.3667e-01,
        -1.0000e+09, -1.8529e-01, -6.1033e-01, -1.0000e+09, -1.3764e-01,
        -2.8116e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.3960e-01, -1.0000e+09, -3.5217e-01, -3.7311e-01,
        -1.0000e+09, -2.3131e-01, -4.2868e-01, -1.0000e+09, -3.5217e-01,
        -2.3249e-01, -1.0000e+09, -3.5217e-01, -1.3333e-01, -1.0000e+09,
        -3.2490e-01, -4.3563e-01, -1.0000e+09, -2.9826e-01, -2.2176e-01,
        -1.0000e+09, -2.8190e-01, -2.8394e-01, -1.0000e+09, -1.6137e-01,
        -4.4856e-01, -1.0000e+09, -2.7131e-01, -4.5053e-01, -1.0000e+09,
        -1.6481e-01, -4.2241e-01, -1.0000e+09, -2.8151e-01, -4.3667e-01,
        -1.0000e+09, -1.8529e-01, -6.1033e-01, -1.0000e+09, -1.3764e-01,
        -2.8116e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.3960e-01, -1.0000e+09, -3.5217e-01, -3.7311e-01,
        -1.0000e+09, -2.3131e-01, -4.2868e-01, -1.0000e+09, -3.5217e-01,
        -2.3249e-01, -1.0000e+09, -3.5217e-01, -1.3333e-01, -1.0000e+09,
        -3.2490e-01, -4.3563e-01, -1.0000e+09, -2.9826e-01, -2.2176e-01,
        -1.0000e+09, -2.8190e-01, -2.8394e-01, -1.0000e+09, -1.6137e-01,
        -4.4856e-01, -1.0000e+09, -2.7131e-01, -4.5053e-01, -1.0000e+09,
        -1.6481e-01, -4.2241e-01, -1.0000e+09, -2.8151e-01, -4.3667e-01,
        -1.0000e+09, -1.8529e-01, -6.1033e-01, -1.0000e+09, -1.3764e-01,
        -2.8116e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.3960e-01, -1.0000e+09, -3.5217e-01, -3.7311e-01,
        -1.0000e+09, -2.3131e-01, -4.2868e-01, -1.0000e+09, -3.5217e-01,
        -2.3249e-01, -1.0000e+09, -3.5217e-01, -1.3333e-01, -1.0000e+09,
        -3.2490e-01, -4.3563e-01, -1.0000e+09, -2.9826e-01, -2.2176e-01,
        -1.0000e+09, -2.8190e-01, -2.8394e-01, -1.0000e+09, -1.6137e-01,
        -4.4856e-01, -1.0000e+09, -2.7131e-01, -4.5053e-01, -1.0000e+09,
        -1.6481e-01, -4.2241e-01, -1.0000e+09, -2.8151e-01, -4.3667e-01,
        -1.0000e+09, -1.8529e-01, -6.1033e-01, -1.0000e+09, -1.3764e-01,
        -2.8116e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.3960e-01, -1.0000e+09, -3.5217e-01, -3.7311e-01,
        -1.0000e+09, -2.3131e-01, -4.2868e-01, -1.0000e+09, -3.5217e-01,
        -2.3249e-01, -1.0000e+09, -3.5217e-01, -1.3333e-01, -1.0000e+09,
        -3.2490e-01, -4.3563e-01, -1.0000e+09, -2.9826e-01, -2.2176e-01,
        -1.0000e+09, -2.8190e-01, -2.8394e-01, -1.0000e+09, -1.6137e-01,
        -4.4856e-01, -1.0000e+09, -2.7131e-01, -4.5053e-01, -1.0000e+09,
        -1.6481e-01, -4.2241e-01, -1.0000e+09, -2.8151e-01, -4.3667e-01,
        -1.0000e+09, -1.8529e-01, -6.1033e-01, -1.0000e+09, -1.3764e-01,
        -2.8116e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.1655e-01, -1.0000e+09,  4.4263e-02, -3.4852e-01,
        -1.0000e+09, -2.2812e-01, -2.0761e-01, -1.0000e+09, -9.6456e-02,
        -3.9798e-01, -1.0000e+09, -2.1542e-01, -4.0058e-01, -1.0000e+09,
        -3.3394e-01, -2.5969e-01, -3.7607e-01, -2.0009e-01, -1.6590e-01,
        -4.2241e-01, -1.2793e-01, -3.8420e-01, -1.0000e+09, -1.6655e-01,
        -5.1955e-01, -1.0000e+09, -3.1514e-01, -4.3088e-01, -1.0000e+09,
        -1.9744e-01, -2.8542e-01, -1.0000e+09, -2.8874e-01, -4.1133e-01,
        -1.0000e+09,  4.4263e-02, -4.5728e-01, -1.0000e+09, -9.0318e-02,
        -3.2918e-01, -1.0000e+09, -5.9544e-02, -2.0761e-01, -1.0000e+09,
        -1.6451e-01, -3.2274e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.1655e-01, -1.0000e+09,  4.4263e-02, -3.4852e-01,
        -1.0000e+09, -2.2812e-01, -2.0761e-01, -1.0000e+09, -9.6456e-02,
        -3.9798e-01, -1.0000e+09, -2.1542e-01, -4.0058e-01, -1.0000e+09,
        -3.3394e-01, -2.5969e-01, -3.7607e-01, -2.0009e-01, -1.6590e-01,
        -4.2241e-01, -1.2793e-01, -3.8420e-01, -1.0000e+09, -1.6655e-01,
        -5.1955e-01, -1.0000e+09, -3.1514e-01, -4.3088e-01, -1.0000e+09,
        -1.9744e-01, -2.8542e-01, -1.0000e+09, -2.8874e-01, -4.1133e-01,
        -1.0000e+09,  4.4263e-02, -4.5728e-01, -1.0000e+09, -9.0318e-02,
        -3.2918e-01, -1.0000e+09, -5.9544e-02, -2.0761e-01, -1.0000e+09,
        -1.6451e-01, -3.2274e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.1655e-01, -1.0000e+09,  4.4263e-02, -3.4852e-01,
        -1.0000e+09, -2.2812e-01, -2.0761e-01, -1.0000e+09, -9.6456e-02,
        -3.9798e-01, -1.0000e+09, -2.1542e-01, -4.0058e-01, -1.0000e+09,
        -3.3394e-01, -2.5969e-01, -3.7607e-01, -2.0009e-01, -1.6590e-01,
        -4.2241e-01, -1.2793e-01, -3.8420e-01, -1.0000e+09, -1.6655e-01,
        -5.1955e-01, -1.0000e+09, -3.1514e-01, -4.3088e-01, -1.0000e+09,
        -1.9744e-01, -2.8542e-01, -1.0000e+09, -2.8874e-01, -4.1133e-01,
        -1.0000e+09,  4.4263e-02, -4.5728e-01, -1.0000e+09, -9.0318e-02,
        -3.2918e-01, -1.0000e+09, -5.9544e-02, -2.0761e-01, -1.0000e+09,
        -1.6451e-01, -3.2274e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.1655e-01, -1.0000e+09,  4.4263e-02, -3.4852e-01,
        -1.0000e+09, -2.2812e-01, -2.0761e-01, -1.0000e+09, -9.6456e-02,
        -3.9798e-01, -1.0000e+09, -2.1542e-01, -4.0058e-01, -1.0000e+09,
        -3.3394e-01, -2.5969e-01, -3.7607e-01, -2.0009e-01, -1.6590e-01,
        -4.2241e-01, -1.2793e-01, -3.8420e-01, -1.0000e+09, -1.6655e-01,
        -5.1955e-01, -1.0000e+09, -3.1514e-01, -4.3088e-01, -1.0000e+09,
        -1.9744e-01, -2.8542e-01, -1.0000e+09, -2.8874e-01, -4.1133e-01,
        -1.0000e+09,  4.4263e-02, -4.5728e-01, -1.0000e+09, -9.0318e-02,
        -3.2918e-01, -1.0000e+09, -5.9544e-02, -2.0761e-01, -1.0000e+09,
        -1.6451e-01, -3.2274e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.1655e-01, -1.0000e+09,  4.4263e-02, -3.4852e-01,
        -1.0000e+09, -2.2812e-01, -2.0761e-01, -1.0000e+09, -9.6456e-02,
        -3.9798e-01, -1.0000e+09, -2.1542e-01, -4.0058e-01, -1.0000e+09,
        -3.3394e-01, -2.5969e-01, -3.7607e-01, -2.0009e-01, -1.6590e-01,
        -4.2241e-01, -1.2793e-01, -3.8420e-01, -1.0000e+09, -1.6655e-01,
        -5.1955e-01, -1.0000e+09, -3.1514e-01, -4.3088e-01, -1.0000e+09,
        -1.9744e-01, -2.8542e-01, -1.0000e+09, -2.8874e-01, -4.1133e-01,
        -1.0000e+09,  4.4263e-02, -4.5728e-01, -1.0000e+09, -9.0318e-02,
        -3.2918e-01, -1.0000e+09, -5.9544e-02, -2.0761e-01, -1.0000e+09,
        -1.6451e-01, -3.2274e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.1655e-01, -1.0000e+09,  4.4263e-02, -3.4852e-01,
        -1.0000e+09, -2.2812e-01, -2.0761e-01, -1.0000e+09, -9.6456e-02,
        -3.9798e-01, -1.0000e+09, -2.1542e-01, -4.0058e-01, -1.0000e+09,
        -3.3394e-01, -2.5969e-01, -3.7607e-01, -2.0009e-01, -1.6590e-01,
        -4.2241e-01, -1.2793e-01, -3.8420e-01, -1.0000e+09, -1.6655e-01,
        -5.1955e-01, -1.0000e+09, -3.1514e-01, -4.3088e-01, -1.0000e+09,
        -1.9744e-01, -2.8542e-01, -1.0000e+09, -2.8874e-01, -4.1133e-01,
        -1.0000e+09,  4.4263e-02, -4.5728e-01, -1.0000e+09, -9.0318e-02,
        -3.2918e-01, -1.0000e+09, -5.9544e-02, -2.0761e-01, -1.0000e+09,
        -1.6451e-01, -3.2274e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.1655e-01, -1.0000e+09,  4.4263e-02, -3.4852e-01,
        -1.0000e+09, -2.2812e-01, -2.0761e-01, -1.0000e+09, -9.6456e-02,
        -3.9798e-01, -1.0000e+09, -2.1542e-01, -4.0058e-01, -1.0000e+09,
        -3.3394e-01, -2.5969e-01, -3.7607e-01, -2.0009e-01, -1.6590e-01,
        -4.2241e-01, -1.2793e-01, -3.8420e-01, -1.0000e+09, -1.6655e-01,
        -5.1955e-01, -1.0000e+09, -3.1514e-01, -4.3088e-01, -1.0000e+09,
        -1.9744e-01, -2.8542e-01, -1.0000e+09, -2.8874e-01, -4.1133e-01,
        -1.0000e+09,  4.4263e-02, -4.5728e-01, -1.0000e+09, -9.0318e-02,
        -3.2918e-01, -1.0000e+09, -5.9544e-02, -2.0761e-01, -1.0000e+09,
        -1.6451e-01, -3.2274e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.1655e-01, -1.0000e+09,  4.4263e-02, -3.4852e-01,
        -1.0000e+09, -2.2812e-01, -2.0761e-01, -1.0000e+09, -9.6456e-02,
        -3.9798e-01, -1.0000e+09, -2.1542e-01, -4.0058e-01, -1.0000e+09,
        -3.3394e-01, -2.5969e-01, -3.7607e-01, -2.0009e-01, -1.6590e-01,
        -4.2241e-01, -1.2793e-01, -3.8420e-01, -1.0000e+09, -1.6655e-01,
        -5.1955e-01, -1.0000e+09, -3.1514e-01, -4.3088e-01, -1.0000e+09,
        -1.9744e-01, -2.8542e-01, -1.0000e+09, -2.8874e-01, -4.1133e-01,
        -1.0000e+09,  4.4263e-02, -4.5728e-01, -1.0000e+09, -9.0318e-02,
        -3.2918e-01, -1.0000e+09, -5.9544e-02, -2.0761e-01, -1.0000e+09,
        -1.6451e-01, -3.2274e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.1655e-01, -1.0000e+09,  4.4263e-02, -3.4852e-01,
        -1.0000e+09, -2.2812e-01, -2.0761e-01, -1.0000e+09, -9.6456e-02,
        -3.9798e-01, -1.0000e+09, -2.1542e-01, -4.0058e-01, -1.0000e+09,
        -3.3394e-01, -2.5969e-01, -3.7607e-01, -2.0009e-01, -1.6590e-01,
        -4.2241e-01, -1.2793e-01, -3.8420e-01, -1.0000e+09, -1.6655e-01,
        -5.1955e-01, -1.0000e+09, -3.1514e-01, -4.3088e-01, -1.0000e+09,
        -1.9744e-01, -2.8542e-01, -1.0000e+09, -2.8874e-01, -4.1133e-01,
        -1.0000e+09,  4.4263e-02, -4.5728e-01, -1.0000e+09, -9.0318e-02,
        -3.2918e-01, -1.0000e+09, -5.9544e-02, -2.0761e-01, -1.0000e+09,
        -1.6451e-01, -3.2274e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.1655e-01, -1.0000e+09,  4.4263e-02, -3.4852e-01,
        -1.0000e+09, -2.2812e-01, -2.0761e-01, -1.0000e+09, -9.6456e-02,
        -3.9798e-01, -1.0000e+09, -2.1542e-01, -4.0058e-01, -1.0000e+09,
        -3.3394e-01, -2.5969e-01, -3.7607e-01, -2.0009e-01, -1.6590e-01,
        -4.2241e-01, -1.2793e-01, -3.8420e-01, -1.0000e+09, -1.6655e-01,
        -5.1955e-01, -1.0000e+09, -3.1514e-01, -4.3088e-01, -1.0000e+09,
        -1.9744e-01, -2.8542e-01, -1.0000e+09, -2.8874e-01, -4.1133e-01,
        -1.0000e+09,  4.4263e-02, -4.5728e-01, -1.0000e+09, -9.0318e-02,
        -3.2918e-01, -1.0000e+09, -5.9544e-02, -2.0761e-01, -1.0000e+09,
        -1.6451e-01, -3.2274e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.1655e-01, -1.0000e+09,  4.4263e-02, -3.4852e-01,
        -1.0000e+09, -2.2812e-01, -2.0761e-01, -1.0000e+09, -9.6456e-02,
        -3.9798e-01, -1.0000e+09, -2.1542e-01, -4.0058e-01, -1.0000e+09,
        -3.3394e-01, -2.5969e-01, -3.7607e-01, -2.0009e-01, -1.6590e-01,
        -4.2241e-01, -1.2793e-01, -3.8420e-01, -1.0000e+09, -1.6655e-01,
        -5.1955e-01, -1.0000e+09, -3.1514e-01, -4.3088e-01, -1.0000e+09,
        -1.9744e-01, -2.8542e-01, -1.0000e+09, -2.8874e-01, -4.1133e-01,
        -1.0000e+09,  4.4263e-02, -4.5728e-01, -1.0000e+09, -9.0318e-02,
        -3.2918e-01, -1.0000e+09, -5.9544e-02, -2.0761e-01, -1.0000e+09,
        -1.6451e-01, -3.2274e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.1655e-01, -1.0000e+09,  4.4263e-02, -3.4852e-01,
        -1.0000e+09, -2.2812e-01, -2.0761e-01, -1.0000e+09, -9.6456e-02,
        -3.9798e-01, -1.0000e+09, -2.1542e-01, -4.0058e-01, -1.0000e+09,
        -3.3394e-01, -2.5969e-01, -3.7607e-01, -2.0009e-01, -1.6590e-01,
        -4.2241e-01, -1.2793e-01, -3.8420e-01, -1.0000e+09, -1.6655e-01,
        -5.1955e-01, -1.0000e+09, -3.1514e-01, -4.3088e-01, -1.0000e+09,
        -1.9744e-01, -2.8542e-01, -1.0000e+09, -2.8874e-01, -4.1133e-01,
        -1.0000e+09,  4.4263e-02, -4.5728e-01, -1.0000e+09, -9.0318e-02,
        -3.2918e-01, -1.0000e+09, -5.9544e-02, -2.0761e-01, -1.0000e+09,
        -1.6451e-01, -3.2274e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01, -1.0000e+09, -2.0938e-01, -2.1065e-01,
        -1.0000e+09, -2.7530e-01, -4.2241e-01, -1.0000e+09, -3.1738e-01,
        -3.8420e-01, -1.0000e+09, -9.0318e-02, -3.8022e-01, -4.7442e-01,
        -2.9768e-01, -2.8347e-01, -1.0000e+09, -2.8498e-01, -4.5498e-01,
        -1.0000e+09, -1.5565e-01, -4.2691e-01, -1.0000e+09, -3.5217e-01,
        -3.4294e-01, -1.0000e+09, -1.1230e-01, -4.2169e-01, -1.0000e+09,
        -3.5217e-01, -4.8456e-01, -1.0000e+09, -2.8151e-01, -2.9321e-01,
        -1.0000e+09, -3.5217e-01, -4.8456e-01, -1.0000e+09, -2.8398e-01,
        -3.6857e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01, -1.0000e+09, -2.0938e-01, -2.1065e-01,
        -1.0000e+09, -2.7530e-01, -4.2241e-01, -1.0000e+09, -3.1738e-01,
        -3.8420e-01, -1.0000e+09, -9.0318e-02, -3.8022e-01, -4.7442e-01,
        -2.9768e-01, -2.8347e-01, -1.0000e+09, -2.8498e-01, -4.5498e-01,
        -1.0000e+09, -1.5565e-01, -4.2691e-01, -1.0000e+09, -3.5217e-01,
        -3.4294e-01, -1.0000e+09, -1.1230e-01, -4.2169e-01, -1.0000e+09,
        -3.5217e-01, -4.8456e-01, -1.0000e+09, -2.8151e-01, -2.9321e-01,
        -1.0000e+09, -3.5217e-01, -4.8456e-01, -1.0000e+09, -2.8398e-01,
        -3.6857e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01, -1.0000e+09, -2.0938e-01, -2.1065e-01,
        -1.0000e+09, -2.7530e-01, -4.2241e-01, -1.0000e+09, -3.1738e-01,
        -3.8420e-01, -1.0000e+09, -9.0318e-02, -3.8022e-01, -4.7442e-01,
        -2.9768e-01, -2.8347e-01, -1.0000e+09, -2.8498e-01, -4.5498e-01,
        -1.0000e+09, -1.5565e-01, -4.2691e-01, -1.0000e+09, -3.5217e-01,
        -3.4294e-01, -1.0000e+09, -1.1230e-01, -4.2169e-01, -1.0000e+09,
        -3.5217e-01, -4.8456e-01, -1.0000e+09, -2.8151e-01, -2.9321e-01,
        -1.0000e+09, -3.5217e-01, -4.8456e-01, -1.0000e+09, -2.8398e-01,
        -3.6857e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01, -1.0000e+09, -2.0938e-01, -2.1065e-01,
        -1.0000e+09, -2.7530e-01, -4.2241e-01, -1.0000e+09, -3.1738e-01,
        -3.8420e-01, -1.0000e+09, -9.0318e-02, -3.8022e-01, -4.7442e-01,
        -2.9768e-01, -2.8347e-01, -1.0000e+09, -2.8498e-01, -4.5498e-01,
        -1.0000e+09, -1.5565e-01, -4.2691e-01, -1.0000e+09, -3.5217e-01,
        -3.4294e-01, -1.0000e+09, -1.1230e-01, -4.2169e-01, -1.0000e+09,
        -3.5217e-01, -4.8456e-01, -1.0000e+09, -2.8151e-01, -2.9321e-01,
        -1.0000e+09, -3.5217e-01, -4.8456e-01, -1.0000e+09, -2.8398e-01,
        -3.6857e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01, -1.0000e+09, -2.0938e-01, -2.1065e-01,
        -1.0000e+09, -2.7530e-01, -4.2241e-01, -1.0000e+09, -3.1738e-01,
        -3.8420e-01, -1.0000e+09, -9.0318e-02, -3.8022e-01, -4.7442e-01,
        -2.9768e-01, -2.8347e-01, -1.0000e+09, -2.8498e-01, -4.5498e-01,
        -1.0000e+09, -1.5565e-01, -4.2691e-01, -1.0000e+09, -3.5217e-01,
        -3.4294e-01, -1.0000e+09, -1.1230e-01, -4.2169e-01, -1.0000e+09,
        -3.5217e-01, -4.8456e-01, -1.0000e+09, -2.8151e-01, -2.9321e-01,
        -1.0000e+09, -3.5217e-01, -4.8456e-01, -1.0000e+09, -2.8398e-01,
        -3.6857e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01, -1.0000e+09, -2.0938e-01, -2.1065e-01,
        -1.0000e+09, -2.7530e-01, -4.2241e-01, -1.0000e+09, -3.1738e-01,
        -3.8420e-01, -1.0000e+09, -9.0318e-02, -3.8022e-01, -4.7442e-01,
        -2.9768e-01, -2.8347e-01, -1.0000e+09, -2.8498e-01, -4.5498e-01,
        -1.0000e+09, -1.5565e-01, -4.2691e-01, -1.0000e+09, -3.5217e-01,
        -3.4294e-01, -1.0000e+09, -1.1230e-01, -4.2169e-01, -1.0000e+09,
        -3.5217e-01, -4.8456e-01, -1.0000e+09, -2.8151e-01, -2.9321e-01,
        -1.0000e+09, -3.5217e-01, -4.8456e-01, -1.0000e+09, -2.8398e-01,
        -3.6857e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01, -1.0000e+09, -2.0938e-01, -2.1065e-01,
        -1.0000e+09, -2.7530e-01, -4.2241e-01, -1.0000e+09, -3.1738e-01,
        -3.8420e-01, -1.0000e+09, -9.0318e-02, -3.8022e-01, -4.7442e-01,
        -2.9768e-01, -2.8347e-01, -1.0000e+09, -2.8498e-01, -4.5498e-01,
        -1.0000e+09, -1.5565e-01, -4.2691e-01, -1.0000e+09, -3.5217e-01,
        -3.4294e-01, -1.0000e+09, -1.1230e-01, -4.2169e-01, -1.0000e+09,
        -3.5217e-01, -4.8456e-01, -1.0000e+09, -2.8151e-01, -2.9321e-01,
        -1.0000e+09, -3.5217e-01, -4.8456e-01, -1.0000e+09, -2.8398e-01,
        -3.6857e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01, -1.0000e+09, -2.0938e-01, -2.1065e-01,
        -1.0000e+09, -2.7530e-01, -4.2241e-01, -1.0000e+09, -3.1738e-01,
        -3.8420e-01, -1.0000e+09, -9.0318e-02, -3.8022e-01, -4.7442e-01,
        -2.9768e-01, -2.8347e-01, -1.0000e+09, -2.8498e-01, -4.5498e-01,
        -1.0000e+09, -1.5565e-01, -4.2691e-01, -1.0000e+09, -3.5217e-01,
        -3.4294e-01, -1.0000e+09, -1.1230e-01, -4.2169e-01, -1.0000e+09,
        -3.5217e-01, -4.8456e-01, -1.0000e+09, -2.8151e-01, -2.9321e-01,
        -1.0000e+09, -3.5217e-01, -4.8456e-01, -1.0000e+09, -2.8398e-01,
        -3.6857e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01, -1.0000e+09, -2.0938e-01, -2.1065e-01,
        -1.0000e+09, -2.7530e-01, -4.2241e-01, -1.0000e+09, -3.1738e-01,
        -3.8420e-01, -1.0000e+09, -9.0318e-02, -3.8022e-01, -4.7442e-01,
        -2.9768e-01, -2.8347e-01, -1.0000e+09, -2.8498e-01, -4.5498e-01,
        -1.0000e+09, -1.5565e-01, -4.2691e-01, -1.0000e+09, -3.5217e-01,
        -3.4294e-01, -1.0000e+09, -1.1230e-01, -4.2169e-01, -1.0000e+09,
        -3.5217e-01, -4.8456e-01, -1.0000e+09, -2.8151e-01, -2.9321e-01,
        -1.0000e+09, -3.5217e-01, -4.8456e-01, -1.0000e+09, -2.8398e-01,
        -3.6857e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01, -1.0000e+09, -2.0938e-01, -2.1065e-01,
        -1.0000e+09, -2.7530e-01, -4.2241e-01, -1.0000e+09, -3.1738e-01,
        -3.8420e-01, -1.0000e+09, -9.0318e-02, -3.8022e-01, -4.7442e-01,
        -2.9768e-01, -2.8347e-01, -1.0000e+09, -2.8498e-01, -4.5498e-01,
        -1.0000e+09, -1.5565e-01, -4.2691e-01, -1.0000e+09, -3.5217e-01,
        -3.4294e-01, -1.0000e+09, -1.1230e-01, -4.2169e-01, -1.0000e+09,
        -3.5217e-01, -4.8456e-01, -1.0000e+09, -2.8151e-01, -2.9321e-01,
        -1.0000e+09, -3.5217e-01, -4.8456e-01, -1.0000e+09, -2.8398e-01,
        -3.6857e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01, -1.0000e+09, -2.0938e-01, -2.1065e-01,
        -1.0000e+09, -2.7530e-01, -4.2241e-01, -1.0000e+09, -3.1738e-01,
        -3.8420e-01, -1.0000e+09, -9.0318e-02, -3.8022e-01, -4.7442e-01,
        -2.9768e-01, -2.8347e-01, -1.0000e+09, -2.8498e-01, -4.5498e-01,
        -1.0000e+09, -1.5565e-01, -4.2691e-01, -1.0000e+09, -3.5217e-01,
        -3.4294e-01, -1.0000e+09, -1.1230e-01, -4.2169e-01, -1.0000e+09,
        -3.5217e-01, -4.8456e-01, -1.0000e+09, -2.8151e-01, -2.9321e-01,
        -1.0000e+09, -3.5217e-01, -4.8456e-01, -1.0000e+09, -2.8398e-01,
        -3.6857e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01, -1.0000e+09, -2.0938e-01, -2.1065e-01,
        -1.0000e+09, -2.7530e-01, -4.2241e-01, -1.0000e+09, -3.1738e-01,
        -3.8420e-01, -1.0000e+09, -9.0318e-02, -3.8022e-01, -4.7442e-01,
        -2.9768e-01, -2.8347e-01, -1.0000e+09, -2.8498e-01, -4.5498e-01,
        -1.0000e+09, -1.5565e-01, -4.2691e-01, -1.0000e+09, -3.5217e-01,
        -3.4294e-01, -1.0000e+09, -1.1230e-01, -4.2169e-01, -1.0000e+09,
        -3.5217e-01, -4.8456e-01, -1.0000e+09, -2.8151e-01, -2.9321e-01,
        -1.0000e+09, -3.5217e-01, -4.8456e-01, -1.0000e+09, -2.8398e-01,
        -3.6857e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-0.0102, -0.2057, -0.3385, -0.2428, -0.1557, -0.1759, -0.2135, -0.2091,
        -0.1393, -0.3293, -0.0903, -0.2866, -0.2830, -0.2487,  0.0443, -0.2812,
         0.0443, -0.2763, -0.2513, -0.2789, -0.1616, -0.1580, -0.3282, -0.3581,
        -0.3533,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2057, -0.3385, -0.2428, -0.1557, -0.1759, -0.2135, -0.2091,
        -0.1393, -0.3293, -0.0903, -

Tokens embeddings:
tensor([-0.0102, -0.2057, -0.3385, -0.2428, -0.1557, -0.1759, -0.2135, -0.2091,
        -0.1393, -0.3293, -0.0903, -0.2866, -0.2830, -0.2487,  0.0443, -0.2812,
         0.0443, -0.2763, -0.2513, -0.2789, -0.1616, -0.1580, -0.3282, -0.3581,
        -0.3533,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2057, -0.3385, -0.2428, -0.1557, -0.1759, -0.2135, -0.2091,
        -0.1393, -0.3293, -0.0903, -

Tokens embeddings:
tensor([-0.0102, -0.2057, -0.3385, -0.2428, -0.1557, -0.1759, -0.2135, -0.2091,
        -0.1393, -0.3293, -0.0903, -0.2866, -0.2830, -0.2487,  0.0443, -0.2812,
         0.0443, -0.2763, -0.2513, -0.2789, -0.1616, -0.1580, -0.3282, -0.3581,
        -0.3533,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2057, -0.3385, -0.2428, -0.1557, -0.1759, -0.2135, -0.2091,
        -0.1393, -0.3293, -0.0903, -

Tokens embeddings:
tensor([-0.0102, -0.2057, -0.3385, -0.2428, -0.1557, -0.1759, -0.2135, -0.2091,
        -0.1393, -0.3293, -0.0903, -0.2866, -0.2830, -0.2487,  0.0443, -0.2812,
         0.0443, -0.2763, -0.2513, -0.2789, -0.1616, -0.1580, -0.3282, -0.3581,
        -0.3533,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2057, -0.3385, -0.2428, -0.1557, -0.1759, -0.2135, -0.2091,
        -0.1393, -0.3293, -0.0903, -

Tokens embeddings:
tensor([-0.0102, -0.2057, -0.3385, -0.2428, -0.1557, -0.1759, -0.2135, -0.2091,
        -0.1393, -0.3293, -0.0903, -0.2866, -0.2830, -0.2487,  0.0443, -0.2812,
         0.0443, -0.2763, -0.2513, -0.2789, -0.1616, -0.1580, -0.3282, -0.3581,
        -0.3533,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2057, -0.3385, -0.2428, -0.1557, -0.1759, -0.2135, -0.2091,
        -0.1393, -0.3293, -0.0903, -

Tokens embeddings:
tensor([-0.0102, -0.2057, -0.3385, -0.2428, -0.1557, -0.1759, -0.2135, -0.2091,
        -0.1393, -0.3293, -0.0903, -0.2866, -0.2830, -0.2487,  0.0443, -0.2812,
         0.0443, -0.2763, -0.2513, -0.2789, -0.1616, -0.1580, -0.3282, -0.3581,
        -0.3533,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2057, -0.3385, -0.2428, -0.1557, -0.1759, -0.2135, -0.2091,
        -0.1393, -0.3293, -0.0903, -

Tokens embeddings:
tensor([-0.0102, -0.2057, -0.3385, -0.2428, -0.1557, -0.1759, -0.2135, -0.2091,
        -0.1393, -0.3293, -0.0903, -0.2866, -0.2830, -0.2487,  0.0443, -0.2812,
         0.0443, -0.2763, -0.2513, -0.2789, -0.1616, -0.1580, -0.3282, -0.3581,
        -0.3533,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2057, -0.3385, -0.2428, -0.1557, -0.1759, -0.2135, -0.2091,
        -0.1393, -0.3293, -0.0903, -

Tokens embeddings:
tensor([-0.0102, -0.2057, -0.3385, -0.2428, -0.1557, -0.1759, -0.2135, -0.2091,
        -0.1393, -0.3293, -0.0903, -0.2866, -0.2830, -0.2487,  0.0443, -0.2812,
         0.0443, -0.2763, -0.2513, -0.2789, -0.1616, -0.1580, -0.3282, -0.3581,
        -0.3533,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2057, -0.3385, -0.2428, -0.1557, -0.1759, -0.2135, -0.2091,
        -0.1393, -0.3293, -0.0903, -

Tokens embeddings:
tensor([-0.0102, -0.2057, -0.3385, -0.2428, -0.1557, -0.1759, -0.2135, -0.2091,
        -0.1393, -0.3293, -0.0903, -0.2866, -0.2830, -0.2487,  0.0443, -0.2812,
         0.0443, -0.2763, -0.2513, -0.2789, -0.1616, -0.1580, -0.3282, -0.3581,
        -0.3533,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2057, -0.3385, -0.2428, -0.1557, -0.1759, -0.2135, -0.2091,
        -0.1393, -0.3293, -0.0903, -

Tokens embeddings:
tensor([-0.0102, -0.2057, -0.3385, -0.2428, -0.1557, -0.1759, -0.2135, -0.2091,
        -0.1393, -0.3293, -0.0903, -0.2866, -0.2830, -0.2487,  0.0443, -0.2812,
         0.0443, -0.2763, -0.2513, -0.2789, -0.1616, -0.1580, -0.3282, -0.3581,
        -0.3533,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2057, -0.3385, -0.2428, -0.1557, -0.1759, -0.2135, -0.2091,
        -0.1393, -0.3293, -0.0903, -

Tokens embeddings:
tensor([-0.0102, -0.2057, -0.3385, -0.2428, -0.1557, -0.1759, -0.2135, -0.2091,
        -0.1393, -0.3293, -0.0903, -0.2866, -0.2830, -0.2487,  0.0443, -0.2812,
         0.0443, -0.2763, -0.2513, -0.2789, -0.1616, -0.1580, -0.3282, -0.3581,
        -0.3533,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2057, -0.3385, -0.2428, -0.1557, -0.1759, -0.2135, -0.2091,
        -0.1393, -0.3293, -0.0903, -

Tokens embeddings:
tensor([-0.0102, -0.2057, -0.3385, -0.2428, -0.1557, -0.1759, -0.2135, -0.2091,
        -0.1393, -0.3293, -0.0903, -0.2866, -0.2830, -0.2487,  0.0443, -0.2812,
         0.0443, -0.2763, -0.2513, -0.2789, -0.1616, -0.1580, -0.3282, -0.3581,
        -0.3533,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2057, -0.3385, -0.2428, -0.1557, -0.1759, -0.2135, -0.2091,
        -0.1393, -0.3293, -0.0903, -

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01,  4.4263e-02, -2.5066e-01, -1.0000e+09,
        -2.2223e-01, -3.8420e-01, -2.5157e-01, -1.4137e-01, -1.0000e+09,
        -3.4318e-02, -2.2223e-01, -2.8624e-01, -3.5217e-01, -6.6977e-02,
        -3.2243e-01, -4.1190e-01, -1.5565e-01, -2.3612e-01, -1.0000e+09,
        -1.5565e-01, -3.0582e-01, -8.2972e-02, -2.5776e-01, -1.0000e+09,
        -2.8151e-01, -3.5217e-01, -2.3895e-01, -1.6344e-01, -1.3014e-01,
        -9.0318e-02, -3.8686e-01, -2.4451e-01, -2.9343e-01, -1.0000e+09,
        -2.0984e-01, -3.9200e-01,  1.5462e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01,  4.4263e-02, -2.5066e-01, -1.0000e+09,
        -2.2223e-01, -3.8420e-01, -2.5157e-01, -1.4137e-01, -1.0000e+09,
        -3.4318e-02, -2.2223e-01, -2.8624e-01, -3.5217e-01, -6.6977e-02,
        -3.2243e-01, -4.1190e-01, -1.5565e-01, -2.3612e-01, -1.0000e+09,
        -1.5565e-01, -3.0582e-01, -8.2972e-02, -2.5776e-01, -1.0000e+09,
        -2.8151e-01, -3.5217e-01, -2.3895e-01, -1.6344e-01, -1.3014e-01,
        -9.0318e-02, -3.8686e-01, -2.4451e-01, -2.9343e-01, -1.0000e+09,
        -2.0984e-01, -3.9200e-01,  1.5462e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01,  4.4263e-02, -2.5066e-01, -1.0000e+09,
        -2.2223e-01, -3.8420e-01, -2.5157e-01, -1.4137e-01, -1.0000e+09,
        -3.4318e-02, -2.2223e-01, -2.8624e-01, -3.5217e-01, -6.6977e-02,
        -3.2243e-01, -4.1190e-01, -1.5565e-01, -2.3612e-01, -1.0000e+09,
        -1.5565e-01, -3.0582e-01, -8.2972e-02, -2.5776e-01, -1.0000e+09,
        -2.8151e-01, -3.5217e-01, -2.3895e-01, -1.6344e-01, -1.3014e-01,
        -9.0318e-02, -3.8686e-01, -2.4451e-01, -2.9343e-01, -1.0000e+09,
        -2.0984e-01, -3.9200e-01,  1.5462e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01,  4.4263e-02, -2.5066e-01, -1.0000e+09,
        -2.2223e-01, -3.8420e-01, -2.5157e-01, -1.4137e-01, -1.0000e+09,
        -3.4318e-02, -2.2223e-01, -2.8624e-01, -3.5217e-01, -6.6977e-02,
        -3.2243e-01, -4.1190e-01, -1.5565e-01, -2.3612e-01, -1.0000e+09,
        -1.5565e-01, -3.0582e-01, -8.2972e-02, -2.5776e-01, -1.0000e+09,
        -2.8151e-01, -3.5217e-01, -2.3895e-01, -1.6344e-01, -1.3014e-01,
        -9.0318e-02, -3.8686e-01, -2.4451e-01, -2.9343e-01, -1.0000e+09,
        -2.0984e-01, -3.9200e-01,  1.5462e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01,  4.4263e-02, -2.5066e-01, -1.0000e+09,
        -2.2223e-01, -3.8420e-01, -2.5157e-01, -1.4137e-01, -1.0000e+09,
        -3.4318e-02, -2.2223e-01, -2.8624e-01, -3.5217e-01, -6.6977e-02,
        -3.2243e-01, -4.1190e-01, -1.5565e-01, -2.3612e-01, -1.0000e+09,
        -1.5565e-01, -3.0582e-01, -8.2972e-02, -2.5776e-01, -1.0000e+09,
        -2.8151e-01, -3.5217e-01, -2.3895e-01, -1.6344e-01, -1.3014e-01,
        -9.0318e-02, -3.8686e-01, -2.4451e-01, -2.9343e-01, -1.0000e+09,
        -2.0984e-01, -3.9200e-01,  1.5462e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01,  4.4263e-02, -2.5066e-01, -1.0000e+09,
        -2.2223e-01, -3.8420e-01, -2.5157e-01, -1.4137e-01, -1.0000e+09,
        -3.4318e-02, -2.2223e-01, -2.8624e-01, -3.5217e-01, -6.6977e-02,
        -3.2243e-01, -4.1190e-01, -1.5565e-01, -2.3612e-01, -1.0000e+09,
        -1.5565e-01, -3.0582e-01, -8.2972e-02, -2.5776e-01, -1.0000e+09,
        -2.8151e-01, -3.5217e-01, -2.3895e-01, -1.6344e-01, -1.3014e-01,
        -9.0318e-02, -3.8686e-01, -2.4451e-01, -2.9343e-01, -1.0000e+09,
        -2.0984e-01, -3.9200e-01,  1.5462e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01,  4.4263e-02, -2.5066e-01, -1.0000e+09,
        -2.2223e-01, -3.8420e-01, -2.5157e-01, -1.4137e-01, -1.0000e+09,
        -3.4318e-02, -2.2223e-01, -2.8624e-01, -3.5217e-01, -6.6977e-02,
        -3.2243e-01, -4.1190e-01, -1.5565e-01, -2.3612e-01, -1.0000e+09,
        -1.5565e-01, -3.0582e-01, -8.2972e-02, -2.5776e-01, -1.0000e+09,
        -2.8151e-01, -3.5217e-01, -2.3895e-01, -1.6344e-01, -1.3014e-01,
        -9.0318e-02, -3.8686e-01, -2.4451e-01, -2.9343e-01, -1.0000e+09,
        -2.0984e-01, -3.9200e-01,  1.5462e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01,  4.4263e-02, -2.5066e-01, -1.0000e+09,
        -2.2223e-01, -3.8420e-01, -2.5157e-01, -1.4137e-01, -1.0000e+09,
        -3.4318e-02, -2.2223e-01, -2.8624e-01, -3.5217e-01, -6.6977e-02,
        -3.2243e-01, -4.1190e-01, -1.5565e-01, -2.3612e-01, -1.0000e+09,
        -1.5565e-01, -3.0582e-01, -8.2972e-02, -2.5776e-01, -1.0000e+09,
        -2.8151e-01, -3.5217e-01, -2.3895e-01, -1.6344e-01, -1.3014e-01,
        -9.0318e-02, -3.8686e-01, -2.4451e-01, -2.9343e-01, -1.0000e+09,
        -2.0984e-01, -3.9200e-01,  1.5462e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01,  4.4263e-02, -2.5066e-01, -1.0000e+09,
        -2.2223e-01, -3.8420e-01, -2.5157e-01, -1.4137e-01, -1.0000e+09,
        -3.4318e-02, -2.2223e-01, -2.8624e-01, -3.5217e-01, -6.6977e-02,
        -3.2243e-01, -4.1190e-01, -1.5565e-01, -2.3612e-01, -1.0000e+09,
        -1.5565e-01, -3.0582e-01, -8.2972e-02, -2.5776e-01, -1.0000e+09,
        -2.8151e-01, -3.5217e-01, -2.3895e-01, -1.6344e-01, -1.3014e-01,
        -9.0318e-02, -3.8686e-01, -2.4451e-01, -2.9343e-01, -1.0000e+09,
        -2.0984e-01, -3.9200e-01,  1.5462e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01,  4.4263e-02, -2.5066e-01, -1.0000e+09,
        -2.2223e-01, -3.8420e-01, -2.5157e-01, -1.4137e-01, -1.0000e+09,
        -3.4318e-02, -2.2223e-01, -2.8624e-01, -3.5217e-01, -6.6977e-02,
        -3.2243e-01, -4.1190e-01, -1.5565e-01, -2.3612e-01, -1.0000e+09,
        -1.5565e-01, -3.0582e-01, -8.2972e-02, -2.5776e-01, -1.0000e+09,
        -2.8151e-01, -3.5217e-01, -2.3895e-01, -1.6344e-01, -1.3014e-01,
        -9.0318e-02, -3.8686e-01, -2.4451e-01, -2.9343e-01, -1.0000e+09,
        -2.0984e-01, -3.9200e-01,  1.5462e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01,  4.4263e-02, -2.5066e-01, -1.0000e+09,
        -2.2223e-01, -3.8420e-01, -2.5157e-01, -1.4137e-01, -1.0000e+09,
        -3.4318e-02, -2.2223e-01, -2.8624e-01, -3.5217e-01, -6.6977e-02,
        -3.2243e-01, -4.1190e-01, -1.5565e-01, -2.3612e-01, -1.0000e+09,
        -1.5565e-01, -3.0582e-01, -8.2972e-02, -2.5776e-01, -1.0000e+09,
        -2.8151e-01, -3.5217e-01, -2.3895e-01, -1.6344e-01, -1.3014e-01,
        -9.0318e-02, -3.8686e-01, -2.4451e-01, -2.9343e-01, -1.0000e+09,
        -2.0984e-01, -3.9200e-01,  1.5462e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01,  4.4263e-02, -2.5066e-01, -1.0000e+09,
        -2.2223e-01, -3.8420e-01, -2.5157e-01, -1.4137e-01, -1.0000e+09,
        -3.4318e-02, -2.2223e-01, -2.8624e-01, -3.5217e-01, -6.6977e-02,
        -3.2243e-01, -4.1190e-01, -1.5565e-01, -2.3612e-01, -1.0000e+09,
        -1.5565e-01, -3.0582e-01, -8.2972e-02, -2.5776e-01, -1.0000e+09,
        -2.8151e-01, -3.5217e-01, -2.3895e-01, -1.6344e-01, -1.3014e-01,
        -9.0318e-02, -3.8686e-01, -2.4451e-01, -2.9343e-01, -1.0000e+09,
        -2.0984e-01, -3.9200e-01,  1.5462e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-0.0102, -0.3661, -0.1932, -0.3393, -0.3522, -0.2881, -0.0600, -0.3476,
        -0.2815, -0.2830, -0.2493, -0.2305, -0.2463, -0.1659, -0.2584, -0.1516,
        -0.1325, -0.1591, -0.3702, -0.3522, -0.3415, -0.3139, -0.2915, -0.2737,
        -0.2815, -0.2344, -0.2443,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3661, -0.1932, -0.3393, -0.3522, -0.2881, -0.0600, -0.3476,
        -0.2815, -0.2830, -0.2493, -

Tokens embeddings:
tensor([-0.0102, -0.3661, -0.1932, -0.3393, -0.3522, -0.2881, -0.0600, -0.3476,
        -0.2815, -0.2830, -0.2493, -0.2305, -0.2463, -0.1659, -0.2584, -0.1516,
        -0.1325, -0.1591, -0.3702, -0.3522, -0.3415, -0.3139, -0.2915, -0.2737,
        -0.2815, -0.2344, -0.2443,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3661, -0.1932, -0.3393, -0.3522, -0.2881, -0.0600, -0.3476,
        -0.2815, -0.2830, -0.2493, -

Tokens embeddings:
tensor([-0.0102, -0.3661, -0.1932, -0.3393, -0.3522, -0.2881, -0.0600, -0.3476,
        -0.2815, -0.2830, -0.2493, -0.2305, -0.2463, -0.1659, -0.2584, -0.1516,
        -0.1325, -0.1591, -0.3702, -0.3522, -0.3415, -0.3139, -0.2915, -0.2737,
        -0.2815, -0.2344, -0.2443,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3661, -0.1932, -0.3393, -0.3522, -0.2881, -0.0600, -0.3476,
        -0.2815, -0.2830, -0.2493, -

Tokens embeddings:
tensor([-0.0102, -0.3661, -0.1932, -0.3393, -0.3522, -0.2881, -0.0600, -0.3476,
        -0.2815, -0.2830, -0.2493, -0.2305, -0.2463, -0.1659, -0.2584, -0.1516,
        -0.1325, -0.1591, -0.3702, -0.3522, -0.3415, -0.3139, -0.2915, -0.2737,
        -0.2815, -0.2344, -0.2443,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3661, -0.1932, -0.3393, -0.3522, -0.2881, -0.0600, -0.3476,
        -0.2815, -0.2830, -0.2493, -

Tokens embeddings:
tensor([-0.0102, -0.3661, -0.1932, -0.3393, -0.3522, -0.2881, -0.0600, -0.3476,
        -0.2815, -0.2830, -0.2493, -0.2305, -0.2463, -0.1659, -0.2584, -0.1516,
        -0.1325, -0.1591, -0.3702, -0.3522, -0.3415, -0.3139, -0.2915, -0.2737,
        -0.2815, -0.2344, -0.2443,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3661, -0.1932, -0.3393, -0.3522, -0.2881, -0.0600, -0.3476,
        -0.2815, -0.2830, -0.2493, -

Tokens embeddings:
tensor([-0.0102, -0.3661, -0.1932, -0.3393, -0.3522, -0.2881, -0.0600, -0.3476,
        -0.2815, -0.2830, -0.2493, -0.2305, -0.2463, -0.1659, -0.2584, -0.1516,
        -0.1325, -0.1591, -0.3702, -0.3522, -0.3415, -0.3139, -0.2915, -0.2737,
        -0.2815, -0.2344, -0.2443,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3661, -0.1932, -0.3393, -0.3522, -0.2881, -0.0600, -0.3476,
        -0.2815, -0.2830, -0.2493, -

Tokens embeddings:
tensor([-0.0102, -0.3661, -0.1932, -0.3393, -0.3522, -0.2881, -0.0600, -0.3476,
        -0.2815, -0.2830, -0.2493, -0.2305, -0.2463, -0.1659, -0.2584, -0.1516,
        -0.1325, -0.1591, -0.3702, -0.3522, -0.3415, -0.3139, -0.2915, -0.2737,
        -0.2815, -0.2344, -0.2443,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3661, -0.1932, -0.3393, -0.3522, -0.2881, -0.0600, -0.3476,
        -0.2815, -0.2830, -0.2493, -

Tokens embeddings:
tensor([-0.0102, -0.3661, -0.1932, -0.3393, -0.3522, -0.2881, -0.0600, -0.3476,
        -0.2815, -0.2830, -0.2493, -0.2305, -0.2463, -0.1659, -0.2584, -0.1516,
        -0.1325, -0.1591, -0.3702, -0.3522, -0.3415, -0.3139, -0.2915, -0.2737,
        -0.2815, -0.2344, -0.2443,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3661, -0.1932, -0.3393, -0.3522, -0.2881, -0.0600, -0.3476,
        -0.2815, -0.2830, -0.2493, -

Tokens embeddings:
tensor([-0.0102, -0.3661, -0.1932, -0.3393, -0.3522, -0.2881, -0.0600, -0.3476,
        -0.2815, -0.2830, -0.2493, -0.2305, -0.2463, -0.1659, -0.2584, -0.1516,
        -0.1325, -0.1591, -0.3702, -0.3522, -0.3415, -0.3139, -0.2915, -0.2737,
        -0.2815, -0.2344, -0.2443,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3661, -0.1932, -0.3393, -0.3522, -0.2881, -0.0600, -0.3476,
        -0.2815, -0.2830, -0.2493, -

Tokens embeddings:
tensor([-0.0102, -0.3661, -0.1932, -0.3393, -0.3522, -0.2881, -0.0600, -0.3476,
        -0.2815, -0.2830, -0.2493, -0.2305, -0.2463, -0.1659, -0.2584, -0.1516,
        -0.1325, -0.1591, -0.3702, -0.3522, -0.3415, -0.3139, -0.2915, -0.2737,
        -0.2815, -0.2344, -0.2443,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3661, -0.1932, -0.3393, -0.3522, -0.2881, -0.0600, -0.3476,
        -0.2815, -0.2830, -0.2493, -

Tokens embeddings:
tensor([-0.0102, -0.3661, -0.1932, -0.3393, -0.3522, -0.2881, -0.0600, -0.3476,
        -0.2815, -0.2830, -0.2493, -0.2305, -0.2463, -0.1659, -0.2584, -0.1516,
        -0.1325, -0.1591, -0.3702, -0.3522, -0.3415, -0.3139, -0.2915, -0.2737,
        -0.2815, -0.2344, -0.2443,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3661, -0.1932, -0.3393, -0.3522, -0.2881, -0.0600, -0.3476,
        -0.2815, -0.2830, -0.2493, -

Tokens embeddings:
tensor([-0.0102, -0.3661, -0.1932, -0.3393, -0.3522, -0.2881, -0.0600, -0.3476,
        -0.2815, -0.2830, -0.2493, -0.2305, -0.2463, -0.1659, -0.2584, -0.1516,
        -0.1325, -0.1591, -0.3702, -0.3522, -0.3415, -0.3139, -0.2915, -0.2737,
        -0.2815, -0.2344, -0.2443,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3661, -0.1932, -0.3393, -0.3522, -0.2881, -0.0600, -0.3476,
        -0.2815, -0.2830, -0.2493, -

Tokens embeddings:
tensor([-1.0233e-02, -4.3200e-01, -1.8753e-01, -2.6926e-01, -1.0000e+09,
        -3.3221e-01, -4.3649e-01, -3.3942e-01, -1.4316e-01, -1.0000e+09,
        -9.0318e-02, -4.7442e-01, -4.3643e-02, -2.7828e-01, -1.0000e+09,
        -2.4218e-01, -3.8265e-01, -1.6655e-01, -1.7840e-01, -1.0000e+09,
        -2.8299e-01, -2.5045e-01,  1.5462e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -4.3200e-01, -1.8753e-01, -2.6926e-01, -1.0000e+09,
        -3.3221e-01, -4.3649e-01, -3.3942e-01, -1.4316e-01, -1.0000e+09,
        -9.0318e-02, -4.7442e-01, -4.3643e-02, -2.7828e-01, -1.0000e+09,
        -2.4218e-01, -3.8265e-01, -1.6655e-01, -1.7840e-01, -1.0000e+09,
        -2.8299e-01, -2.5045e-01,  1.5462e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -4.3200e-01, -1.8753e-01, -2.6926e-01, -1.0000e+09,
        -3.3221e-01, -4.3649e-01, -3.3942e-01, -1.4316e-01, -1.0000e+09,
        -9.0318e-02, -4.7442e-01, -4.3643e-02, -2.7828e-01, -1.0000e+09,
        -2.4218e-01, -3.8265e-01, -1.6655e-01, -1.7840e-01, -1.0000e+09,
        -2.8299e-01, -2.5045e-01,  1.5462e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -4.3200e-01, -1.8753e-01, -2.6926e-01, -1.0000e+09,
        -3.3221e-01, -4.3649e-01, -3.3942e-01, -1.4316e-01, -1.0000e+09,
        -9.0318e-02, -4.7442e-01, -4.3643e-02, -2.7828e-01, -1.0000e+09,
        -2.4218e-01, -3.8265e-01, -1.6655e-01, -1.7840e-01, -1.0000e+09,
        -2.8299e-01, -2.5045e-01,  1.5462e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -4.3200e-01, -1.8753e-01, -2.6926e-01, -1.0000e+09,
        -3.3221e-01, -4.3649e-01, -3.3942e-01, -1.4316e-01, -1.0000e+09,
        -9.0318e-02, -4.7442e-01, -4.3643e-02, -2.7828e-01, -1.0000e+09,
        -2.4218e-01, -3.8265e-01, -1.6655e-01, -1.7840e-01, -1.0000e+09,
        -2.8299e-01, -2.5045e-01,  1.5462e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -4.3200e-01, -1.8753e-01, -2.6926e-01, -1.0000e+09,
        -3.3221e-01, -4.3649e-01, -3.3942e-01, -1.4316e-01, -1.0000e+09,
        -9.0318e-02, -4.7442e-01, -4.3643e-02, -2.7828e-01, -1.0000e+09,
        -2.4218e-01, -3.8265e-01, -1.6655e-01, -1.7840e-01, -1.0000e+09,
        -2.8299e-01, -2.5045e-01,  1.5462e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -4.3200e-01, -1.8753e-01, -2.6926e-01, -1.0000e+09,
        -3.3221e-01, -4.3649e-01, -3.3942e-01, -1.4316e-01, -1.0000e+09,
        -9.0318e-02, -4.7442e-01, -4.3643e-02, -2.7828e-01, -1.0000e+09,
        -2.4218e-01, -3.8265e-01, -1.6655e-01, -1.7840e-01, -1.0000e+09,
        -2.8299e-01, -2.5045e-01,  1.5462e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -4.3200e-01, -1.8753e-01, -2.6926e-01, -1.0000e+09,
        -3.3221e-01, -4.3649e-01, -3.3942e-01, -1.4316e-01, -1.0000e+09,
        -9.0318e-02, -4.7442e-01, -4.3643e-02, -2.7828e-01, -1.0000e+09,
        -2.4218e-01, -3.8265e-01, -1.6655e-01, -1.7840e-01, -1.0000e+09,
        -2.8299e-01, -2.5045e-01,  1.5462e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -4.3200e-01, -1.8753e-01, -2.6926e-01, -1.0000e+09,
        -3.3221e-01, -4.3649e-01, -3.3942e-01, -1.4316e-01, -1.0000e+09,
        -9.0318e-02, -4.7442e-01, -4.3643e-02, -2.7828e-01, -1.0000e+09,
        -2.4218e-01, -3.8265e-01, -1.6655e-01, -1.7840e-01, -1.0000e+09,
        -2.8299e-01, -2.5045e-01,  1.5462e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -4.3200e-01, -1.8753e-01, -2.6926e-01, -1.0000e+09,
        -3.3221e-01, -4.3649e-01, -3.3942e-01, -1.4316e-01, -1.0000e+09,
        -9.0318e-02, -4.7442e-01, -4.3643e-02, -2.7828e-01, -1.0000e+09,
        -2.4218e-01, -3.8265e-01, -1.6655e-01, -1.7840e-01, -1.0000e+09,
        -2.8299e-01, -2.5045e-01,  1.5462e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -4.3200e-01, -1.8753e-01, -2.6926e-01, -1.0000e+09,
        -3.3221e-01, -4.3649e-01, -3.3942e-01, -1.4316e-01, -1.0000e+09,
        -9.0318e-02, -4.7442e-01, -4.3643e-02, -2.7828e-01, -1.0000e+09,
        -2.4218e-01, -3.8265e-01, -1.6655e-01, -1.7840e-01, -1.0000e+09,
        -2.8299e-01, -2.5045e-01,  1.5462e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -4.3200e-01, -1.8753e-01, -2.6926e-01, -1.0000e+09,
        -3.3221e-01, -4.3649e-01, -3.3942e-01, -1.4316e-01, -1.0000e+09,
        -9.0318e-02, -4.7442e-01, -4.3643e-02, -2.7828e-01, -1.0000e+09,
        -2.4218e-01, -3.8265e-01, -1.6655e-01, -1.7840e-01, -1.0000e+09,
        -2.8299e-01, -2.5045e-01,  1.5462e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -3.8022e-01, -1.0000e+09, -1.6359e-01, -3.5781e-01,
        -1.0000e+09, -6.7895e-02, -3.7863e-01, -1.0000e+09, -3.5217e-01,
        -2.2452e-01, -1.0000e+09, -1.7381e-01, -3.3419e-01, -1.0000e+09,
        -3.1394e-01, -3.5281e-01, -1.0000e+09, -2.6643e-01, -3.8420e-01,
        -1.0000e+09, -2.8463e-01, -4.4030e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.8022e-01, -1.0000e+09, -1.6359e-01, -3.5781e-01,
        -1.0000e+09, -6.7895e-02, -3.7863e-01, -1.0000e+09, -3.5217e-01,
        -2.2452e-01, -1.0000e+09, -1.7381e-01, -3.3419e-01, -1.0000e+09,
        -3.1394e-01, -3.5281e-01, -1.0000e+09, -2.6643e-01, -3.8420e-01,
        -1.0000e+09, -2.8463e-01, -4.4030e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.8022e-01, -1.0000e+09, -1.6359e-01, -3.5781e-01,
        -1.0000e+09, -6.7895e-02, -3.7863e-01, -1.0000e+09, -3.5217e-01,
        -2.2452e-01, -1.0000e+09, -1.7381e-01, -3.3419e-01, -1.0000e+09,
        -3.1394e-01, -3.5281e-01, -1.0000e+09, -2.6643e-01, -3.8420e-01,
        -1.0000e+09, -2.8463e-01, -4.4030e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.8022e-01, -1.0000e+09, -1.6359e-01, -3.5781e-01,
        -1.0000e+09, -6.7895e-02, -3.7863e-01, -1.0000e+09, -3.5217e-01,
        -2.2452e-01, -1.0000e+09, -1.7381e-01, -3.3419e-01, -1.0000e+09,
        -3.1394e-01, -3.5281e-01, -1.0000e+09, -2.6643e-01, -3.8420e-01,
        -1.0000e+09, -2.8463e-01, -4.4030e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.8022e-01, -1.0000e+09, -1.6359e-01, -3.5781e-01,
        -1.0000e+09, -6.7895e-02, -3.7863e-01, -1.0000e+09, -3.5217e-01,
        -2.2452e-01, -1.0000e+09, -1.7381e-01, -3.3419e-01, -1.0000e+09,
        -3.1394e-01, -3.5281e-01, -1.0000e+09, -2.6643e-01, -3.8420e-01,
        -1.0000e+09, -2.8463e-01, -4.4030e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.8022e-01, -1.0000e+09, -1.6359e-01, -3.5781e-01,
        -1.0000e+09, -6.7895e-02, -3.7863e-01, -1.0000e+09, -3.5217e-01,
        -2.2452e-01, -1.0000e+09, -1.7381e-01, -3.3419e-01, -1.0000e+09,
        -3.1394e-01, -3.5281e-01, -1.0000e+09, -2.6643e-01, -3.8420e-01,
        -1.0000e+09, -2.8463e-01, -4.4030e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.8022e-01, -1.0000e+09, -1.6359e-01, -3.5781e-01,
        -1.0000e+09, -6.7895e-02, -3.7863e-01, -1.0000e+09, -3.5217e-01,
        -2.2452e-01, -1.0000e+09, -1.7381e-01, -3.3419e-01, -1.0000e+09,
        -3.1394e-01, -3.5281e-01, -1.0000e+09, -2.6643e-01, -3.8420e-01,
        -1.0000e+09, -2.8463e-01, -4.4030e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.8022e-01, -1.0000e+09, -1.6359e-01, -3.5781e-01,
        -1.0000e+09, -6.7895e-02, -3.7863e-01, -1.0000e+09, -3.5217e-01,
        -2.2452e-01, -1.0000e+09, -1.7381e-01, -3.3419e-01, -1.0000e+09,
        -3.1394e-01, -3.5281e-01, -1.0000e+09, -2.6643e-01, -3.8420e-01,
        -1.0000e+09, -2.8463e-01, -4.4030e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.8022e-01, -1.0000e+09, -1.6359e-01, -3.5781e-01,
        -1.0000e+09, -6.7895e-02, -3.7863e-01, -1.0000e+09, -3.5217e-01,
        -2.2452e-01, -1.0000e+09, -1.7381e-01, -3.3419e-01, -1.0000e+09,
        -3.1394e-01, -3.5281e-01, -1.0000e+09, -2.6643e-01, -3.8420e-01,
        -1.0000e+09, -2.8463e-01, -4.4030e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.8022e-01, -1.0000e+09, -1.6359e-01, -3.5781e-01,
        -1.0000e+09, -6.7895e-02, -3.7863e-01, -1.0000e+09, -3.5217e-01,
        -2.2452e-01, -1.0000e+09, -1.7381e-01, -3.3419e-01, -1.0000e+09,
        -3.1394e-01, -3.5281e-01, -1.0000e+09, -2.6643e-01, -3.8420e-01,
        -1.0000e+09, -2.8463e-01, -4.4030e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.8022e-01, -1.0000e+09, -1.6359e-01, -3.5781e-01,
        -1.0000e+09, -6.7895e-02, -3.7863e-01, -1.0000e+09, -3.5217e-01,
        -2.2452e-01, -1.0000e+09, -1.7381e-01, -3.3419e-01, -1.0000e+09,
        -3.1394e-01, -3.5281e-01, -1.0000e+09, -2.6643e-01, -3.8420e-01,
        -1.0000e+09, -2.8463e-01, -4.4030e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.8022e-01, -1.0000e+09, -1.6359e-01, -3.5781e-01,
        -1.0000e+09, -6.7895e-02, -3.7863e-01, -1.0000e+09, -3.5217e-01,
        -2.2452e-01, -1.0000e+09, -1.7381e-01, -3.3419e-01, -1.0000e+09,
        -3.1394e-01, -3.5281e-01, -1.0000e+09, -2.6643e-01, -3.8420e-01,
        -1.0000e+09, -2.8463e-01, -4.4030e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-0.0102, -0.3897, -0.0903, -0.2982, -0.3092, -0.2428, -0.1557, -0.1759,
        -0.2991, -0.3139, -0.3522, -0.2933, -0.2074, -0.2954, -0.3098, -0.2897,
        -0.3270, -0.2761, -0.2602, -0.3121, -0.3522, -0.2835, -0.3139, -0.3251,
        -0.3522, -0.2951,  0.0443,  0.0443,  0.0443,  0.0155, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3897, -0.0903, -0.2982, -0.3092, -0.2428, -0.1557, -0.1759,
        -0.2991, -0.3139, -0.3522, -

Tokens embeddings:
tensor([-0.0102, -0.3897, -0.0903, -0.2982, -0.3092, -0.2428, -0.1557, -0.1759,
        -0.2991, -0.3139, -0.3522, -0.2933, -0.2074, -0.2954, -0.3098, -0.2897,
        -0.3270, -0.2761, -0.2602, -0.3121, -0.3522, -0.2835, -0.3139, -0.3251,
        -0.3522, -0.2951,  0.0443,  0.0443,  0.0443,  0.0155, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3897, -0.0903, -0.2982, -0.3092, -0.2428, -0.1557, -0.1759,
        -0.2991, -0.3139, -0.3522, -

Tokens embeddings:
tensor([-0.0102, -0.3897, -0.0903, -0.2982, -0.3092, -0.2428, -0.1557, -0.1759,
        -0.2991, -0.3139, -0.3522, -0.2933, -0.2074, -0.2954, -0.3098, -0.2897,
        -0.3270, -0.2761, -0.2602, -0.3121, -0.3522, -0.2835, -0.3139, -0.3251,
        -0.3522, -0.2951,  0.0443,  0.0443,  0.0443,  0.0155, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3897, -0.0903, -0.2982, -0.3092, -0.2428, -0.1557, -0.1759,
        -0.2991, -0.3139, -0.3522, -

Tokens embeddings:
tensor([-0.0102, -0.3897, -0.0903, -0.2982, -0.3092, -0.2428, -0.1557, -0.1759,
        -0.2991, -0.3139, -0.3522, -0.2933, -0.2074, -0.2954, -0.3098, -0.2897,
        -0.3270, -0.2761, -0.2602, -0.3121, -0.3522, -0.2835, -0.3139, -0.3251,
        -0.3522, -0.2951,  0.0443,  0.0443,  0.0443,  0.0155, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3897, -0.0903, -0.2982, -0.3092, -0.2428, -0.1557, -0.1759,
        -0.2991, -0.3139, -0.3522, -

Tokens embeddings:
tensor([-0.0102, -0.3897, -0.0903, -0.2982, -0.3092, -0.2428, -0.1557, -0.1759,
        -0.2991, -0.3139, -0.3522, -0.2933, -0.2074, -0.2954, -0.3098, -0.2897,
        -0.3270, -0.2761, -0.2602, -0.3121, -0.3522, -0.2835, -0.3139, -0.3251,
        -0.3522, -0.2951,  0.0443,  0.0443,  0.0443,  0.0155, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3897, -0.0903, -0.2982, -0.3092, -0.2428, -0.1557, -0.1759,
        -0.2991, -0.3139, -0.3522, -

Tokens embeddings:
tensor([-0.0102, -0.3897, -0.0903, -0.2982, -0.3092, -0.2428, -0.1557, -0.1759,
        -0.2991, -0.3139, -0.3522, -0.2933, -0.2074, -0.2954, -0.3098, -0.2897,
        -0.3270, -0.2761, -0.2602, -0.3121, -0.3522, -0.2835, -0.3139, -0.3251,
        -0.3522, -0.2951,  0.0443,  0.0443,  0.0443,  0.0155, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3897, -0.0903, -0.2982, -0.3092, -0.2428, -0.1557, -0.1759,
        -0.2991, -0.3139, -0.3522, -

Tokens embeddings:
tensor([-0.0102, -0.3897, -0.0903, -0.2982, -0.3092, -0.2428, -0.1557, -0.1759,
        -0.2991, -0.3139, -0.3522, -0.2933, -0.2074, -0.2954, -0.3098, -0.2897,
        -0.3270, -0.2761, -0.2602, -0.3121, -0.3522, -0.2835, -0.3139, -0.3251,
        -0.3522, -0.2951,  0.0443,  0.0443,  0.0443,  0.0155, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3897, -0.0903, -0.2982, -0.3092, -0.2428, -0.1557, -0.1759,
        -0.2991, -0.3139, -0.3522, -

Tokens embeddings:
tensor([-0.0102, -0.3897, -0.0903, -0.2982, -0.3092, -0.2428, -0.1557, -0.1759,
        -0.2991, -0.3139, -0.3522, -0.2933, -0.2074, -0.2954, -0.3098, -0.2897,
        -0.3270, -0.2761, -0.2602, -0.3121, -0.3522, -0.2835, -0.3139, -0.3251,
        -0.3522, -0.2951,  0.0443,  0.0443,  0.0443,  0.0155, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3897, -0.0903, -0.2982, -0.3092, -0.2428, -0.1557, -0.1759,
        -0.2991, -0.3139, -0.3522, -

Tokens embeddings:
tensor([-0.0102, -0.3897, -0.0903, -0.2982, -0.3092, -0.2428, -0.1557, -0.1759,
        -0.2991, -0.3139, -0.3522, -0.2933, -0.2074, -0.2954, -0.3098, -0.2897,
        -0.3270, -0.2761, -0.2602, -0.3121, -0.3522, -0.2835, -0.3139, -0.3251,
        -0.3522, -0.2951,  0.0443,  0.0443,  0.0443,  0.0155, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3897, -0.0903, -0.2982, -0.3092, -0.2428, -0.1557, -0.1759,
        -0.2991, -0.3139, -0.3522, -

Tokens embeddings:
tensor([-0.0102, -0.3897, -0.0903, -0.2982, -0.3092, -0.2428, -0.1557, -0.1759,
        -0.2991, -0.3139, -0.3522, -0.2933, -0.2074, -0.2954, -0.3098, -0.2897,
        -0.3270, -0.2761, -0.2602, -0.3121, -0.3522, -0.2835, -0.3139, -0.3251,
        -0.3522, -0.2951,  0.0443,  0.0443,  0.0443,  0.0155, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3897, -0.0903, -0.2982, -0.3092, -0.2428, -0.1557, -0.1759,
        -0.2991, -0.3139, -0.3522, -

Tokens embeddings:
tensor([-0.0102, -0.3897, -0.0903, -0.2982, -0.3092, -0.2428, -0.1557, -0.1759,
        -0.2991, -0.3139, -0.3522, -0.2933, -0.2074, -0.2954, -0.3098, -0.2897,
        -0.3270, -0.2761, -0.2602, -0.3121, -0.3522, -0.2835, -0.3139, -0.3251,
        -0.3522, -0.2951,  0.0443,  0.0443,  0.0443,  0.0155, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3897, -0.0903, -0.2982, -0.3092, -0.2428, -0.1557, -0.1759,
        -0.2991, -0.3139, -0.3522, -

Tokens embeddings:
tensor([-0.0102, -0.3897, -0.0903, -0.2982, -0.3092, -0.2428, -0.1557, -0.1759,
        -0.2991, -0.3139, -0.3522, -0.2933, -0.2074, -0.2954, -0.3098, -0.2897,
        -0.3270, -0.2761, -0.2602, -0.3121, -0.3522, -0.2835, -0.3139, -0.3251,
        -0.3522, -0.2951,  0.0443,  0.0443,  0.0443,  0.0155, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3897, -0.0903, -0.2982, -0.3092, -0.2428, -0.1557, -0.1759,
        -0.2991, -0.3139, -0.3522, -

Tokens embeddings:
tensor([-0.0102, -0.3429, -0.3199, -0.3074, -0.2498, -0.2734, -0.1557, -0.1939,
        -0.2742, -0.2305, -0.2395, -0.2866, -0.2761, -0.2602, -0.3139, -0.2751,
        -0.2305, -0.3522, -0.2275, -0.3917, -0.2815, -0.2830, -0.3429, -0.3215,
        -0.0903, -0.3286, -0.2428, -0.1557, -0.1759, -0.2148, -0.3522, -0.3356,
        -0.1665, -0.2964,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3429, -0.3199, -0.3074, -0.2498, -0.2734, -0.1557, -0.1939,
        -0.2742, -0.2305, -0.2395, -

Tokens embeddings:
tensor([-0.0102, -0.3429, -0.3199, -0.3074, -0.2498, -0.2734, -0.1557, -0.1939,
        -0.2742, -0.2305, -0.2395, -0.2866, -0.2761, -0.2602, -0.3139, -0.2751,
        -0.2305, -0.3522, -0.2275, -0.3917, -0.2815, -0.2830, -0.3429, -0.3215,
        -0.0903, -0.3286, -0.2428, -0.1557, -0.1759, -0.2148, -0.3522, -0.3356,
        -0.1665, -0.2964,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3429, -0.3199, -0.3074, -0.2498, -0.2734, -0.1557, -0.1939,
        -0.2742, -0.2305, -0.2395, -

Tokens embeddings:
tensor([-0.0102, -0.3429, -0.3199, -0.3074, -0.2498, -0.2734, -0.1557, -0.1939,
        -0.2742, -0.2305, -0.2395, -0.2866, -0.2761, -0.2602, -0.3139, -0.2751,
        -0.2305, -0.3522, -0.2275, -0.3917, -0.2815, -0.2830, -0.3429, -0.3215,
        -0.0903, -0.3286, -0.2428, -0.1557, -0.1759, -0.2148, -0.3522, -0.3356,
        -0.1665, -0.2964,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3429, -0.3199, -0.3074, -0.2498, -0.2734, -0.1557, -0.1939,
        -0.2742, -0.2305, -0.2395, -

Tokens embeddings:
tensor([-0.0102, -0.3429, -0.3199, -0.3074, -0.2498, -0.2734, -0.1557, -0.1939,
        -0.2742, -0.2305, -0.2395, -0.2866, -0.2761, -0.2602, -0.3139, -0.2751,
        -0.2305, -0.3522, -0.2275, -0.3917, -0.2815, -0.2830, -0.3429, -0.3215,
        -0.0903, -0.3286, -0.2428, -0.1557, -0.1759, -0.2148, -0.3522, -0.3356,
        -0.1665, -0.2964,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3429, -0.3199, -0.3074, -0.2498, -0.2734, -0.1557, -0.1939,
        -0.2742, -0.2305, -0.2395, -

Tokens embeddings:
tensor([-0.0102, -0.3429, -0.3199, -0.3074, -0.2498, -0.2734, -0.1557, -0.1939,
        -0.2742, -0.2305, -0.2395, -0.2866, -0.2761, -0.2602, -0.3139, -0.2751,
        -0.2305, -0.3522, -0.2275, -0.3917, -0.2815, -0.2830, -0.3429, -0.3215,
        -0.0903, -0.3286, -0.2428, -0.1557, -0.1759, -0.2148, -0.3522, -0.3356,
        -0.1665, -0.2964,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3429, -0.3199, -0.3074, -0.2498, -0.2734, -0.1557, -0.1939,
        -0.2742, -0.2305, -0.2395, -

Tokens embeddings:
tensor([-0.0102, -0.3429, -0.3199, -0.3074, -0.2498, -0.2734, -0.1557, -0.1939,
        -0.2742, -0.2305, -0.2395, -0.2866, -0.2761, -0.2602, -0.3139, -0.2751,
        -0.2305, -0.3522, -0.2275, -0.3917, -0.2815, -0.2830, -0.3429, -0.3215,
        -0.0903, -0.3286, -0.2428, -0.1557, -0.1759, -0.2148, -0.3522, -0.3356,
        -0.1665, -0.2964,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3429, -0.3199, -0.3074, -0.2498, -0.2734, -0.1557, -0.1939,
        -0.2742, -0.2305, -0.2395, -

Tokens embeddings:
tensor([-0.0102, -0.3429, -0.3199, -0.3074, -0.2498, -0.2734, -0.1557, -0.1939,
        -0.2742, -0.2305, -0.2395, -0.2866, -0.2761, -0.2602, -0.3139, -0.2751,
        -0.2305, -0.3522, -0.2275, -0.3917, -0.2815, -0.2830, -0.3429, -0.3215,
        -0.0903, -0.3286, -0.2428, -0.1557, -0.1759, -0.2148, -0.3522, -0.3356,
        -0.1665, -0.2964,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3429, -0.3199, -0.3074, -0.2498, -0.2734, -0.1557, -0.1939,
        -0.2742, -0.2305, -0.2395, -

Tokens embeddings:
tensor([-0.0102, -0.3429, -0.3199, -0.3074, -0.2498, -0.2734, -0.1557, -0.1939,
        -0.2742, -0.2305, -0.2395, -0.2866, -0.2761, -0.2602, -0.3139, -0.2751,
        -0.2305, -0.3522, -0.2275, -0.3917, -0.2815, -0.2830, -0.3429, -0.3215,
        -0.0903, -0.3286, -0.2428, -0.1557, -0.1759, -0.2148, -0.3522, -0.3356,
        -0.1665, -0.2964,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3429, -0.3199, -0.3074, -0.2498, -0.2734, -0.1557, -0.1939,
        -0.2742, -0.2305, -0.2395, -

Tokens embeddings:
tensor([-0.0102, -0.3429, -0.3199, -0.3074, -0.2498, -0.2734, -0.1557, -0.1939,
        -0.2742, -0.2305, -0.2395, -0.2866, -0.2761, -0.2602, -0.3139, -0.2751,
        -0.2305, -0.3522, -0.2275, -0.3917, -0.2815, -0.2830, -0.3429, -0.3215,
        -0.0903, -0.3286, -0.2428, -0.1557, -0.1759, -0.2148, -0.3522, -0.3356,
        -0.1665, -0.2964,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3429, -0.3199, -0.3074, -0.2498, -0.2734, -0.1557, -0.1939,
        -0.2742, -0.2305, -0.2395, -

Tokens embeddings:
tensor([-0.0102, -0.3429, -0.3199, -0.3074, -0.2498, -0.2734, -0.1557, -0.1939,
        -0.2742, -0.2305, -0.2395, -0.2866, -0.2761, -0.2602, -0.3139, -0.2751,
        -0.2305, -0.3522, -0.2275, -0.3917, -0.2815, -0.2830, -0.3429, -0.3215,
        -0.0903, -0.3286, -0.2428, -0.1557, -0.1759, -0.2148, -0.3522, -0.3356,
        -0.1665, -0.2964,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3429, -0.3199, -0.3074, -0.2498, -0.2734, -0.1557, -0.1939,
        -0.2742, -0.2305, -0.2395, -

Tokens embeddings:
tensor([-0.0102, -0.3429, -0.3199, -0.3074, -0.2498, -0.2734, -0.1557, -0.1939,
        -0.2742, -0.2305, -0.2395, -0.2866, -0.2761, -0.2602, -0.3139, -0.2751,
        -0.2305, -0.3522, -0.2275, -0.3917, -0.2815, -0.2830, -0.3429, -0.3215,
        -0.0903, -0.3286, -0.2428, -0.1557, -0.1759, -0.2148, -0.3522, -0.3356,
        -0.1665, -0.2964,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3429, -0.3199, -0.3074, -0.2498, -0.2734, -0.1557, -0.1939,
        -0.2742, -0.2305, -0.2395, -

Tokens embeddings:
tensor([-0.0102, -0.3429, -0.3199, -0.3074, -0.2498, -0.2734, -0.1557, -0.1939,
        -0.2742, -0.2305, -0.2395, -0.2866, -0.2761, -0.2602, -0.3139, -0.2751,
        -0.2305, -0.3522, -0.2275, -0.3917, -0.2815, -0.2830, -0.3429, -0.3215,
        -0.0903, -0.3286, -0.2428, -0.1557, -0.1759, -0.2148, -0.3522, -0.3356,
        -0.1665, -0.2964,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.3429, -0.3199, -0.3074, -0.2498, -0.2734, -0.1557, -0.1939,
        -0.2742, -0.2305, -0.2395, -

Tokens embeddings:
tensor([-1.0233e-02, -3.7405e-01, -1.0000e+09, -3.0050e-01, -3.1954e-01,
        -1.0000e+09, -1.6655e-01, -4.7442e-01, -1.0000e+09, -2.7530e-01,
        -2.0761e-01, -1.0000e+09, -2.0532e-01, -2.6632e-01, -1.0000e+09,
        -3.0787e-01, -9.9006e-02, -1.0000e+09, -1.6760e-01, -2.7339e-01,
        -1.0000e+09, -2.4218e-01, -4.4074e-01, -1.0000e+09, -3.1394e-01,
        -4.7442e-01, -1.0000e+09, -9.0318e-02, -5.1385e-01, -1.0000e+09,
        -3.1753e-01, -4.8938e-01, -1.0000e+09, -3.1688e-01, -4.7065e-01,
        -1.0000e+09, -3.1394e-01, -3.6515e-01, -1.0000e+09, -2.4115e-01,
        -4.1190e-01, -1.0000e+09, -1.5565e-01, -4.1190e-01, -1.0000e+09,
        -1.3439e-01, -4.1190e-01, -1.0000e+09, -2.7412e-01, -3.9437e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7405e-01, -1.0000e+09, -3.0050e-01, -3.1954e-01,
        -1.0000e+09, -1.6655e-01, -4.7442e-01, -1.0000e+09, -2.7530e-01,
        -2.0761e-01, -1.0000e+09, -2.0532e-01, -2.6632e-01, -1.0000e+09,
        -3.0787e-01, -9.9006e-02, -1.0000e+09, -1.6760e-01, -2.7339e-01,
        -1.0000e+09, -2.4218e-01, -4.4074e-01, -1.0000e+09, -3.1394e-01,
        -4.7442e-01, -1.0000e+09, -9.0318e-02, -5.1385e-01, -1.0000e+09,
        -3.1753e-01, -4.8938e-01, -1.0000e+09, -3.1688e-01, -4.7065e-01,
        -1.0000e+09, -3.1394e-01, -3.6515e-01, -1.0000e+09, -2.4115e-01,
        -4.1190e-01, -1.0000e+09, -1.5565e-01, -4.1190e-01, -1.0000e+09,
        -1.3439e-01, -4.1190e-01, -1.0000e+09, -2.7412e-01, -3.9437e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7405e-01, -1.0000e+09, -3.0050e-01, -3.1954e-01,
        -1.0000e+09, -1.6655e-01, -4.7442e-01, -1.0000e+09, -2.7530e-01,
        -2.0761e-01, -1.0000e+09, -2.0532e-01, -2.6632e-01, -1.0000e+09,
        -3.0787e-01, -9.9006e-02, -1.0000e+09, -1.6760e-01, -2.7339e-01,
        -1.0000e+09, -2.4218e-01, -4.4074e-01, -1.0000e+09, -3.1394e-01,
        -4.7442e-01, -1.0000e+09, -9.0318e-02, -5.1385e-01, -1.0000e+09,
        -3.1753e-01, -4.8938e-01, -1.0000e+09, -3.1688e-01, -4.7065e-01,
        -1.0000e+09, -3.1394e-01, -3.6515e-01, -1.0000e+09, -2.4115e-01,
        -4.1190e-01, -1.0000e+09, -1.5565e-01, -4.1190e-01, -1.0000e+09,
        -1.3439e-01, -4.1190e-01, -1.0000e+09, -2.7412e-01, -3.9437e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7405e-01, -1.0000e+09, -3.0050e-01, -3.1954e-01,
        -1.0000e+09, -1.6655e-01, -4.7442e-01, -1.0000e+09, -2.7530e-01,
        -2.0761e-01, -1.0000e+09, -2.0532e-01, -2.6632e-01, -1.0000e+09,
        -3.0787e-01, -9.9006e-02, -1.0000e+09, -1.6760e-01, -2.7339e-01,
        -1.0000e+09, -2.4218e-01, -4.4074e-01, -1.0000e+09, -3.1394e-01,
        -4.7442e-01, -1.0000e+09, -9.0318e-02, -5.1385e-01, -1.0000e+09,
        -3.1753e-01, -4.8938e-01, -1.0000e+09, -3.1688e-01, -4.7065e-01,
        -1.0000e+09, -3.1394e-01, -3.6515e-01, -1.0000e+09, -2.4115e-01,
        -4.1190e-01, -1.0000e+09, -1.5565e-01, -4.1190e-01, -1.0000e+09,
        -1.3439e-01, -4.1190e-01, -1.0000e+09, -2.7412e-01, -3.9437e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7405e-01, -1.0000e+09, -3.0050e-01, -3.1954e-01,
        -1.0000e+09, -1.6655e-01, -4.7442e-01, -1.0000e+09, -2.7530e-01,
        -2.0761e-01, -1.0000e+09, -2.0532e-01, -2.6632e-01, -1.0000e+09,
        -3.0787e-01, -9.9006e-02, -1.0000e+09, -1.6760e-01, -2.7339e-01,
        -1.0000e+09, -2.4218e-01, -4.4074e-01, -1.0000e+09, -3.1394e-01,
        -4.7442e-01, -1.0000e+09, -9.0318e-02, -5.1385e-01, -1.0000e+09,
        -3.1753e-01, -4.8938e-01, -1.0000e+09, -3.1688e-01, -4.7065e-01,
        -1.0000e+09, -3.1394e-01, -3.6515e-01, -1.0000e+09, -2.4115e-01,
        -4.1190e-01, -1.0000e+09, -1.5565e-01, -4.1190e-01, -1.0000e+09,
        -1.3439e-01, -4.1190e-01, -1.0000e+09, -2.7412e-01, -3.9437e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7405e-01, -1.0000e+09, -3.0050e-01, -3.1954e-01,
        -1.0000e+09, -1.6655e-01, -4.7442e-01, -1.0000e+09, -2.7530e-01,
        -2.0761e-01, -1.0000e+09, -2.0532e-01, -2.6632e-01, -1.0000e+09,
        -3.0787e-01, -9.9006e-02, -1.0000e+09, -1.6760e-01, -2.7339e-01,
        -1.0000e+09, -2.4218e-01, -4.4074e-01, -1.0000e+09, -3.1394e-01,
        -4.7442e-01, -1.0000e+09, -9.0318e-02, -5.1385e-01, -1.0000e+09,
        -3.1753e-01, -4.8938e-01, -1.0000e+09, -3.1688e-01, -4.7065e-01,
        -1.0000e+09, -3.1394e-01, -3.6515e-01, -1.0000e+09, -2.4115e-01,
        -4.1190e-01, -1.0000e+09, -1.5565e-01, -4.1190e-01, -1.0000e+09,
        -1.3439e-01, -4.1190e-01, -1.0000e+09, -2.7412e-01, -3.9437e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7405e-01, -1.0000e+09, -3.0050e-01, -3.1954e-01,
        -1.0000e+09, -1.6655e-01, -4.7442e-01, -1.0000e+09, -2.7530e-01,
        -2.0761e-01, -1.0000e+09, -2.0532e-01, -2.6632e-01, -1.0000e+09,
        -3.0787e-01, -9.9006e-02, -1.0000e+09, -1.6760e-01, -2.7339e-01,
        -1.0000e+09, -2.4218e-01, -4.4074e-01, -1.0000e+09, -3.1394e-01,
        -4.7442e-01, -1.0000e+09, -9.0318e-02, -5.1385e-01, -1.0000e+09,
        -3.1753e-01, -4.8938e-01, -1.0000e+09, -3.1688e-01, -4.7065e-01,
        -1.0000e+09, -3.1394e-01, -3.6515e-01, -1.0000e+09, -2.4115e-01,
        -4.1190e-01, -1.0000e+09, -1.5565e-01, -4.1190e-01, -1.0000e+09,
        -1.3439e-01, -4.1190e-01, -1.0000e+09, -2.7412e-01, -3.9437e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7405e-01, -1.0000e+09, -3.0050e-01, -3.1954e-01,
        -1.0000e+09, -1.6655e-01, -4.7442e-01, -1.0000e+09, -2.7530e-01,
        -2.0761e-01, -1.0000e+09, -2.0532e-01, -2.6632e-01, -1.0000e+09,
        -3.0787e-01, -9.9006e-02, -1.0000e+09, -1.6760e-01, -2.7339e-01,
        -1.0000e+09, -2.4218e-01, -4.4074e-01, -1.0000e+09, -3.1394e-01,
        -4.7442e-01, -1.0000e+09, -9.0318e-02, -5.1385e-01, -1.0000e+09,
        -3.1753e-01, -4.8938e-01, -1.0000e+09, -3.1688e-01, -4.7065e-01,
        -1.0000e+09, -3.1394e-01, -3.6515e-01, -1.0000e+09, -2.4115e-01,
        -4.1190e-01, -1.0000e+09, -1.5565e-01, -4.1190e-01, -1.0000e+09,
        -1.3439e-01, -4.1190e-01, -1.0000e+09, -2.7412e-01, -3.9437e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7405e-01, -1.0000e+09, -3.0050e-01, -3.1954e-01,
        -1.0000e+09, -1.6655e-01, -4.7442e-01, -1.0000e+09, -2.7530e-01,
        -2.0761e-01, -1.0000e+09, -2.0532e-01, -2.6632e-01, -1.0000e+09,
        -3.0787e-01, -9.9006e-02, -1.0000e+09, -1.6760e-01, -2.7339e-01,
        -1.0000e+09, -2.4218e-01, -4.4074e-01, -1.0000e+09, -3.1394e-01,
        -4.7442e-01, -1.0000e+09, -9.0318e-02, -5.1385e-01, -1.0000e+09,
        -3.1753e-01, -4.8938e-01, -1.0000e+09, -3.1688e-01, -4.7065e-01,
        -1.0000e+09, -3.1394e-01, -3.6515e-01, -1.0000e+09, -2.4115e-01,
        -4.1190e-01, -1.0000e+09, -1.5565e-01, -4.1190e-01, -1.0000e+09,
        -1.3439e-01, -4.1190e-01, -1.0000e+09, -2.7412e-01, -3.9437e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7405e-01, -1.0000e+09, -3.0050e-01, -3.1954e-01,
        -1.0000e+09, -1.6655e-01, -4.7442e-01, -1.0000e+09, -2.7530e-01,
        -2.0761e-01, -1.0000e+09, -2.0532e-01, -2.6632e-01, -1.0000e+09,
        -3.0787e-01, -9.9006e-02, -1.0000e+09, -1.6760e-01, -2.7339e-01,
        -1.0000e+09, -2.4218e-01, -4.4074e-01, -1.0000e+09, -3.1394e-01,
        -4.7442e-01, -1.0000e+09, -9.0318e-02, -5.1385e-01, -1.0000e+09,
        -3.1753e-01, -4.8938e-01, -1.0000e+09, -3.1688e-01, -4.7065e-01,
        -1.0000e+09, -3.1394e-01, -3.6515e-01, -1.0000e+09, -2.4115e-01,
        -4.1190e-01, -1.0000e+09, -1.5565e-01, -4.1190e-01, -1.0000e+09,
        -1.3439e-01, -4.1190e-01, -1.0000e+09, -2.7412e-01, -3.9437e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7405e-01, -1.0000e+09, -3.0050e-01, -3.1954e-01,
        -1.0000e+09, -1.6655e-01, -4.7442e-01, -1.0000e+09, -2.7530e-01,
        -2.0761e-01, -1.0000e+09, -2.0532e-01, -2.6632e-01, -1.0000e+09,
        -3.0787e-01, -9.9006e-02, -1.0000e+09, -1.6760e-01, -2.7339e-01,
        -1.0000e+09, -2.4218e-01, -4.4074e-01, -1.0000e+09, -3.1394e-01,
        -4.7442e-01, -1.0000e+09, -9.0318e-02, -5.1385e-01, -1.0000e+09,
        -3.1753e-01, -4.8938e-01, -1.0000e+09, -3.1688e-01, -4.7065e-01,
        -1.0000e+09, -3.1394e-01, -3.6515e-01, -1.0000e+09, -2.4115e-01,
        -4.1190e-01, -1.0000e+09, -1.5565e-01, -4.1190e-01, -1.0000e+09,
        -1.3439e-01, -4.1190e-01, -1.0000e+09, -2.7412e-01, -3.9437e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7405e-01, -1.0000e+09, -3.0050e-01, -3.1954e-01,
        -1.0000e+09, -1.6655e-01, -4.7442e-01, -1.0000e+09, -2.7530e-01,
        -2.0761e-01, -1.0000e+09, -2.0532e-01, -2.6632e-01, -1.0000e+09,
        -3.0787e-01, -9.9006e-02, -1.0000e+09, -1.6760e-01, -2.7339e-01,
        -1.0000e+09, -2.4218e-01, -4.4074e-01, -1.0000e+09, -3.1394e-01,
        -4.7442e-01, -1.0000e+09, -9.0318e-02, -5.1385e-01, -1.0000e+09,
        -3.1753e-01, -4.8938e-01, -1.0000e+09, -3.1688e-01, -4.7065e-01,
        -1.0000e+09, -3.1394e-01, -3.6515e-01, -1.0000e+09, -2.4115e-01,
        -4.1190e-01, -1.0000e+09, -1.5565e-01, -4.1190e-01, -1.0000e+09,
        -1.3439e-01, -4.1190e-01, -1.0000e+09, -2.7412e-01, -3.9437e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02, -1.0184e-02,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

In [None]:
# Pooling First token [CLS] for each sentence - argmax

# Select some sentences randomly
sent_index = [11, 12, 13, 14, 15, 16, 17, 18, 19, 20]

for s in sent_index:
  print("*" * 100)
  # Get the sentence's words
  tokens = test_inputs[s]
  # For each layer...
  for l in range(12):
    print("\nLayer", l+1)
    attention = np.squeeze(test_attentions[s][l].detach().cpu().numpy(), axis=0)
    # and for each head
    #for h, head in enumerate(attention):
    #print("Head", h+1)
    # Get the attention for the cls token
    encoded_tokens = bert_tokenizer(tokens, padding=True, truncation=True, max_length=128, return_tensors='pt')
    encoded_tokens = encoded_tokens.to(device)
    with torch.no_grad():
      model_output1 = model_e(**encoded_tokens)
      tokens_embeddings = model_output1[0][:,0]
      tokens_embeddings = tokens_embeddings.cpu()
      cls_attentions = head[0]
      display(HTML(colorize(tokens, cls_attentions)))
      print("Tokens embeddings:")
      print(tokens_embeddings)
      arg = argmax(tokens_embeddings)
      print('arg max of %s: %d' % (tokens_embeddings, arg))

****************************************************************************************************

Layer 1


Tokens embeddings:
tensor([-0.3412, -0.3741, -0.3037, -0.2723, -0.4744, -0.3792, -0.3842, -0.4224,
        -0.5023, -0.2076, -0.4744, -0.3957, -0.4119, -0.4338, -0.4879, -0.4043,
        -0.4261, -0.4468, -0.4744, -0.1959, -0.4119, -0.4338, -0.3072, -0.1870,
        -0.4462, -0.4009, -0.3814, -0.4744, -0.1356, -0.2218, -0.4744, -0.4416,
        -0.2950, -0.4054, -0.3218, -0.4119, -0.3013, -0.2779, -0.2326, -0.1740,
        -0.3682, -0.3676, -0.3366, -0.3311, -0.4309, -0.4224, -0.5198, -0.4457,
        -0.4385, -0.3412, -0.2329, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3741, -0.3037, -0.2723, -0.4744, -0.3792, -0.3842, -0.4224,
        -0.5023, -0.2076, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.3741, -0.3037, -0.2723, -0.4744, -0.3792, -0.3842, -0.4224,
        -0.5023, -0.2076, -0.4744, -0.3957, -0.4119, -0.4338, -0.4879, -0.4043,
        -0.4261, -0.4468, -0.4744, -0.1959, -0.4119, -0.4338, -0.3072, -0.1870,
        -0.4462, -0.4009, -0.3814, -0.4744, -0.1356, -0.2218, -0.4744, -0.4416,
        -0.2950, -0.4054, -0.3218, -0.4119, -0.3013, -0.2779, -0.2326, -0.1740,
        -0.3682, -0.3676, -0.3366, -0.3311, -0.4309, -0.4224, -0.5198, -0.4457,
        -0.4385, -0.3412, -0.2329, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3741, -0.3037, -0.2723, -0.4744, -0.3792, -0.3842, -0.4224,
        -0.5023, -0.2076, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.3741, -0.3037, -0.2723, -0.4744, -0.3792, -0.3842, -0.4224,
        -0.5023, -0.2076, -0.4744, -0.3957, -0.4119, -0.4338, -0.4879, -0.4043,
        -0.4261, -0.4468, -0.4744, -0.1959, -0.4119, -0.4338, -0.3072, -0.1870,
        -0.4462, -0.4009, -0.3814, -0.4744, -0.1356, -0.2218, -0.4744, -0.4416,
        -0.2950, -0.4054, -0.3218, -0.4119, -0.3013, -0.2779, -0.2326, -0.1740,
        -0.3682, -0.3676, -0.3366, -0.3311, -0.4309, -0.4224, -0.5198, -0.4457,
        -0.4385, -0.3412, -0.2329, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3741, -0.3037, -0.2723, -0.4744, -0.3792, -0.3842, -0.4224,
        -0.5023, -0.2076, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.3741, -0.3037, -0.2723, -0.4744, -0.3792, -0.3842, -0.4224,
        -0.5023, -0.2076, -0.4744, -0.3957, -0.4119, -0.4338, -0.4879, -0.4043,
        -0.4261, -0.4468, -0.4744, -0.1959, -0.4119, -0.4338, -0.3072, -0.1870,
        -0.4462, -0.4009, -0.3814, -0.4744, -0.1356, -0.2218, -0.4744, -0.4416,
        -0.2950, -0.4054, -0.3218, -0.4119, -0.3013, -0.2779, -0.2326, -0.1740,
        -0.3682, -0.3676, -0.3366, -0.3311, -0.4309, -0.4224, -0.5198, -0.4457,
        -0.4385, -0.3412, -0.2329, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3741, -0.3037, -0.2723, -0.4744, -0.3792, -0.3842, -0.4224,
        -0.5023, -0.2076, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.3741, -0.3037, -0.2723, -0.4744, -0.3792, -0.3842, -0.4224,
        -0.5023, -0.2076, -0.4744, -0.3957, -0.4119, -0.4338, -0.4879, -0.4043,
        -0.4261, -0.4468, -0.4744, -0.1959, -0.4119, -0.4338, -0.3072, -0.1870,
        -0.4462, -0.4009, -0.3814, -0.4744, -0.1356, -0.2218, -0.4744, -0.4416,
        -0.2950, -0.4054, -0.3218, -0.4119, -0.3013, -0.2779, -0.2326, -0.1740,
        -0.3682, -0.3676, -0.3366, -0.3311, -0.4309, -0.4224, -0.5198, -0.4457,
        -0.4385, -0.3412, -0.2329, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3741, -0.3037, -0.2723, -0.4744, -0.3792, -0.3842, -0.4224,
        -0.5023, -0.2076, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.3741, -0.3037, -0.2723, -0.4744, -0.3792, -0.3842, -0.4224,
        -0.5023, -0.2076, -0.4744, -0.3957, -0.4119, -0.4338, -0.4879, -0.4043,
        -0.4261, -0.4468, -0.4744, -0.1959, -0.4119, -0.4338, -0.3072, -0.1870,
        -0.4462, -0.4009, -0.3814, -0.4744, -0.1356, -0.2218, -0.4744, -0.4416,
        -0.2950, -0.4054, -0.3218, -0.4119, -0.3013, -0.2779, -0.2326, -0.1740,
        -0.3682, -0.3676, -0.3366, -0.3311, -0.4309, -0.4224, -0.5198, -0.4457,
        -0.4385, -0.3412, -0.2329, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3741, -0.3037, -0.2723, -0.4744, -0.3792, -0.3842, -0.4224,
        -0.5023, -0.2076, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.3741, -0.3037, -0.2723, -0.4744, -0.3792, -0.3842, -0.4224,
        -0.5023, -0.2076, -0.4744, -0.3957, -0.4119, -0.4338, -0.4879, -0.4043,
        -0.4261, -0.4468, -0.4744, -0.1959, -0.4119, -0.4338, -0.3072, -0.1870,
        -0.4462, -0.4009, -0.3814, -0.4744, -0.1356, -0.2218, -0.4744, -0.4416,
        -0.2950, -0.4054, -0.3218, -0.4119, -0.3013, -0.2779, -0.2326, -0.1740,
        -0.3682, -0.3676, -0.3366, -0.3311, -0.4309, -0.4224, -0.5198, -0.4457,
        -0.4385, -0.3412, -0.2329, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3741, -0.3037, -0.2723, -0.4744, -0.3792, -0.3842, -0.4224,
        -0.5023, -0.2076, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.3741, -0.3037, -0.2723, -0.4744, -0.3792, -0.3842, -0.4224,
        -0.5023, -0.2076, -0.4744, -0.3957, -0.4119, -0.4338, -0.4879, -0.4043,
        -0.4261, -0.4468, -0.4744, -0.1959, -0.4119, -0.4338, -0.3072, -0.1870,
        -0.4462, -0.4009, -0.3814, -0.4744, -0.1356, -0.2218, -0.4744, -0.4416,
        -0.2950, -0.4054, -0.3218, -0.4119, -0.3013, -0.2779, -0.2326, -0.1740,
        -0.3682, -0.3676, -0.3366, -0.3311, -0.4309, -0.4224, -0.5198, -0.4457,
        -0.4385, -0.3412, -0.2329, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3741, -0.3037, -0.2723, -0.4744, -0.3792, -0.3842, -0.4224,
        -0.5023, -0.2076, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.3741, -0.3037, -0.2723, -0.4744, -0.3792, -0.3842, -0.4224,
        -0.5023, -0.2076, -0.4744, -0.3957, -0.4119, -0.4338, -0.4879, -0.4043,
        -0.4261, -0.4468, -0.4744, -0.1959, -0.4119, -0.4338, -0.3072, -0.1870,
        -0.4462, -0.4009, -0.3814, -0.4744, -0.1356, -0.2218, -0.4744, -0.4416,
        -0.2950, -0.4054, -0.3218, -0.4119, -0.3013, -0.2779, -0.2326, -0.1740,
        -0.3682, -0.3676, -0.3366, -0.3311, -0.4309, -0.4224, -0.5198, -0.4457,
        -0.4385, -0.3412, -0.2329, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3741, -0.3037, -0.2723, -0.4744, -0.3792, -0.3842, -0.4224,
        -0.5023, -0.2076, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.3741, -0.3037, -0.2723, -0.4744, -0.3792, -0.3842, -0.4224,
        -0.5023, -0.2076, -0.4744, -0.3957, -0.4119, -0.4338, -0.4879, -0.4043,
        -0.4261, -0.4468, -0.4744, -0.1959, -0.4119, -0.4338, -0.3072, -0.1870,
        -0.4462, -0.4009, -0.3814, -0.4744, -0.1356, -0.2218, -0.4744, -0.4416,
        -0.2950, -0.4054, -0.3218, -0.4119, -0.3013, -0.2779, -0.2326, -0.1740,
        -0.3682, -0.3676, -0.3366, -0.3311, -0.4309, -0.4224, -0.5198, -0.4457,
        -0.4385, -0.3412, -0.2329, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3741, -0.3037, -0.2723, -0.4744, -0.3792, -0.3842, -0.4224,
        -0.5023, -0.2076, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.3741, -0.3037, -0.2723, -0.4744, -0.3792, -0.3842, -0.4224,
        -0.5023, -0.2076, -0.4744, -0.3957, -0.4119, -0.4338, -0.4879, -0.4043,
        -0.4261, -0.4468, -0.4744, -0.1959, -0.4119, -0.4338, -0.3072, -0.1870,
        -0.4462, -0.4009, -0.3814, -0.4744, -0.1356, -0.2218, -0.4744, -0.4416,
        -0.2950, -0.4054, -0.3218, -0.4119, -0.3013, -0.2779, -0.2326, -0.1740,
        -0.3682, -0.3676, -0.3366, -0.3311, -0.4309, -0.4224, -0.5198, -0.4457,
        -0.4385, -0.3412, -0.2329, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3741, -0.3037, -0.2723, -0.4744, -0.3792, -0.3842, -0.4224,
        -0.5023, -0.2076, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.3741, -0.3037, -0.2723, -0.4744, -0.3792, -0.3842, -0.4224,
        -0.5023, -0.2076, -0.4744, -0.3957, -0.4119, -0.4338, -0.4879, -0.4043,
        -0.4261, -0.4468, -0.4744, -0.1959, -0.4119, -0.4338, -0.3072, -0.1870,
        -0.4462, -0.4009, -0.3814, -0.4744, -0.1356, -0.2218, -0.4744, -0.4416,
        -0.2950, -0.4054, -0.3218, -0.4119, -0.3013, -0.2779, -0.2326, -0.1740,
        -0.3682, -0.3676, -0.3366, -0.3311, -0.4309, -0.4224, -0.5198, -0.4457,
        -0.4385, -0.3412, -0.2329, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3741, -0.3037, -0.2723, -0.4744, -0.3792, -0.3842, -0.4224,
        -0.5023, -0.2076, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.4486, -0.4091, -0.4744, -0.3529, -0.1853,
        -0.2910, -0.4044, -0.3342, -0.4224, -0.4359, -0.2723, -0.4881, -0.3806,
        -0.1995, -0.2723, -0.3312, -0.4373, -0.3741, -0.4744, -0.4585, -0.3842,
        -0.4195, -0.2921, -0.2910, -0.3529, -0.4119, -0.1516, -0.1870, -0.4740,
        -0.3032, -0.1848, -0.4009, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.4486, -0.4091, -0.4744, -0.3529, -0.1853,
        -0.2910, -0.4044, -0.3342, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.4486, -0.4091, -0.4744, -0.3529, -0.1853,
        -0.2910, -0.4044, -0.3342, -0.4224, -0.4359, -0.2723, -0.4881, -0.3806,
        -0.1995, -0.2723, -0.3312, -0.4373, -0.3741, -0.4744, -0.4585, -0.3842,
        -0.4195, -0.2921, -0.2910, -0.3529, -0.4119, -0.1516, -0.1870, -0.4740,
        -0.3032, -0.1848, -0.4009, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.4486, -0.4091, -0.4744, -0.3529, -0.1853,
        -0.2910, -0.4044, -0.3342, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.4486, -0.4091, -0.4744, -0.3529, -0.1853,
        -0.2910, -0.4044, -0.3342, -0.4224, -0.4359, -0.2723, -0.4881, -0.3806,
        -0.1995, -0.2723, -0.3312, -0.4373, -0.3741, -0.4744, -0.4585, -0.3842,
        -0.4195, -0.2921, -0.2910, -0.3529, -0.4119, -0.1516, -0.1870, -0.4740,
        -0.3032, -0.1848, -0.4009, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.4486, -0.4091, -0.4744, -0.3529, -0.1853,
        -0.2910, -0.4044, -0.3342, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.4486, -0.4091, -0.4744, -0.3529, -0.1853,
        -0.2910, -0.4044, -0.3342, -0.4224, -0.4359, -0.2723, -0.4881, -0.3806,
        -0.1995, -0.2723, -0.3312, -0.4373, -0.3741, -0.4744, -0.4585, -0.3842,
        -0.4195, -0.2921, -0.2910, -0.3529, -0.4119, -0.1516, -0.1870, -0.4740,
        -0.3032, -0.1848, -0.4009, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.4486, -0.4091, -0.4744, -0.3529, -0.1853,
        -0.2910, -0.4044, -0.3342, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.4486, -0.4091, -0.4744, -0.3529, -0.1853,
        -0.2910, -0.4044, -0.3342, -0.4224, -0.4359, -0.2723, -0.4881, -0.3806,
        -0.1995, -0.2723, -0.3312, -0.4373, -0.3741, -0.4744, -0.4585, -0.3842,
        -0.4195, -0.2921, -0.2910, -0.3529, -0.4119, -0.1516, -0.1870, -0.4740,
        -0.3032, -0.1848, -0.4009, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.4486, -0.4091, -0.4744, -0.3529, -0.1853,
        -0.2910, -0.4044, -0.3342, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.4486, -0.4091, -0.4744, -0.3529, -0.1853,
        -0.2910, -0.4044, -0.3342, -0.4224, -0.4359, -0.2723, -0.4881, -0.3806,
        -0.1995, -0.2723, -0.3312, -0.4373, -0.3741, -0.4744, -0.4585, -0.3842,
        -0.4195, -0.2921, -0.2910, -0.3529, -0.4119, -0.1516, -0.1870, -0.4740,
        -0.3032, -0.1848, -0.4009, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.4486, -0.4091, -0.4744, -0.3529, -0.1853,
        -0.2910, -0.4044, -0.3342, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.4486, -0.4091, -0.4744, -0.3529, -0.1853,
        -0.2910, -0.4044, -0.3342, -0.4224, -0.4359, -0.2723, -0.4881, -0.3806,
        -0.1995, -0.2723, -0.3312, -0.4373, -0.3741, -0.4744, -0.4585, -0.3842,
        -0.4195, -0.2921, -0.2910, -0.3529, -0.4119, -0.1516, -0.1870, -0.4740,
        -0.3032, -0.1848, -0.4009, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.4486, -0.4091, -0.4744, -0.3529, -0.1853,
        -0.2910, -0.4044, -0.3342, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.4486, -0.4091, -0.4744, -0.3529, -0.1853,
        -0.2910, -0.4044, -0.3342, -0.4224, -0.4359, -0.2723, -0.4881, -0.3806,
        -0.1995, -0.2723, -0.3312, -0.4373, -0.3741, -0.4744, -0.4585, -0.3842,
        -0.4195, -0.2921, -0.2910, -0.3529, -0.4119, -0.1516, -0.1870, -0.4740,
        -0.3032, -0.1848, -0.4009, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.4486, -0.4091, -0.4744, -0.3529, -0.1853,
        -0.2910, -0.4044, -0.3342, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.4486, -0.4091, -0.4744, -0.3529, -0.1853,
        -0.2910, -0.4044, -0.3342, -0.4224, -0.4359, -0.2723, -0.4881, -0.3806,
        -0.1995, -0.2723, -0.3312, -0.4373, -0.3741, -0.4744, -0.4585, -0.3842,
        -0.4195, -0.2921, -0.2910, -0.3529, -0.4119, -0.1516, -0.1870, -0.4740,
        -0.3032, -0.1848, -0.4009, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.4486, -0.4091, -0.4744, -0.3529, -0.1853,
        -0.2910, -0.4044, -0.3342, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.4486, -0.4091, -0.4744, -0.3529, -0.1853,
        -0.2910, -0.4044, -0.3342, -0.4224, -0.4359, -0.2723, -0.4881, -0.3806,
        -0.1995, -0.2723, -0.3312, -0.4373, -0.3741, -0.4744, -0.4585, -0.3842,
        -0.4195, -0.2921, -0.2910, -0.3529, -0.4119, -0.1516, -0.1870, -0.4740,
        -0.3032, -0.1848, -0.4009, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.4486, -0.4091, -0.4744, -0.3529, -0.1853,
        -0.2910, -0.4044, -0.3342, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.4486, -0.4091, -0.4744, -0.3529, -0.1853,
        -0.2910, -0.4044, -0.3342, -0.4224, -0.4359, -0.2723, -0.4881, -0.3806,
        -0.1995, -0.2723, -0.3312, -0.4373, -0.3741, -0.4744, -0.4585, -0.3842,
        -0.4195, -0.2921, -0.2910, -0.3529, -0.4119, -0.1516, -0.1870, -0.4740,
        -0.3032, -0.1848, -0.4009, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.4486, -0.4091, -0.4744, -0.3529, -0.1853,
        -0.2910, -0.4044, -0.3342, -

Tokens embeddings:
tensor([-0.3412, -0.3791, -0.0990, -0.4486, -0.4091, -0.4744, -0.3529, -0.1853,
        -0.2910, -0.4044, -0.3342, -0.4224, -0.4359, -0.2723, -0.4881, -0.3806,
        -0.1995, -0.2723, -0.3312, -0.4373, -0.3741, -0.4744, -0.4585, -0.3842,
        -0.4195, -0.2921, -0.2910, -0.3529, -0.4119, -0.1516, -0.1870, -0.4740,
        -0.3032, -0.1848, -0.4009, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3791, -0.0990, -0.4486, -0.4091, -0.4744, -0.3529, -0.1853,
        -0.2910, -0.4044, -0.3342, -

Tokens embeddings:
tensor([-0.3412, -0.4838, -0.3748, -0.3576, -0.5139, -0.4236, -0.4243, -0.3922,
        -0.2811, -0.2076, -0.4316, -0.3342, -0.4472, -0.5139, -0.4054, -0.2218,
        -0.2978, -0.3339, -0.3218, -0.4119, -0.3013, -0.4312, -0.5182, -0.4040,
        -0.3741, -0.4744, -0.2861, -0.1870, -0.3056, -0.3922, -0.3704, -0.2723,
        -0.3998, -0.4744, -0.4773, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4838, -0.3748, -0.3576, -0.5139, -0.4236, -0.4243, -0.3922,
        -0.2811, -0.2076, -0.4316, -

Tokens embeddings:
tensor([-0.3412, -0.4838, -0.3748, -0.3576, -0.5139, -0.4236, -0.4243, -0.3922,
        -0.2811, -0.2076, -0.4316, -0.3342, -0.4472, -0.5139, -0.4054, -0.2218,
        -0.2978, -0.3339, -0.3218, -0.4119, -0.3013, -0.4312, -0.5182, -0.4040,
        -0.3741, -0.4744, -0.2861, -0.1870, -0.3056, -0.3922, -0.3704, -0.2723,
        -0.3998, -0.4744, -0.4773, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4838, -0.3748, -0.3576, -0.5139, -0.4236, -0.4243, -0.3922,
        -0.2811, -0.2076, -0.4316, -

Tokens embeddings:
tensor([-0.3412, -0.4838, -0.3748, -0.3576, -0.5139, -0.4236, -0.4243, -0.3922,
        -0.2811, -0.2076, -0.4316, -0.3342, -0.4472, -0.5139, -0.4054, -0.2218,
        -0.2978, -0.3339, -0.3218, -0.4119, -0.3013, -0.4312, -0.5182, -0.4040,
        -0.3741, -0.4744, -0.2861, -0.1870, -0.3056, -0.3922, -0.3704, -0.2723,
        -0.3998, -0.4744, -0.4773, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4838, -0.3748, -0.3576, -0.5139, -0.4236, -0.4243, -0.3922,
        -0.2811, -0.2076, -0.4316, -

Tokens embeddings:
tensor([-0.3412, -0.4838, -0.3748, -0.3576, -0.5139, -0.4236, -0.4243, -0.3922,
        -0.2811, -0.2076, -0.4316, -0.3342, -0.4472, -0.5139, -0.4054, -0.2218,
        -0.2978, -0.3339, -0.3218, -0.4119, -0.3013, -0.4312, -0.5182, -0.4040,
        -0.3741, -0.4744, -0.2861, -0.1870, -0.3056, -0.3922, -0.3704, -0.2723,
        -0.3998, -0.4744, -0.4773, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4838, -0.3748, -0.3576, -0.5139, -0.4236, -0.4243, -0.3922,
        -0.2811, -0.2076, -0.4316, -

Tokens embeddings:
tensor([-0.3412, -0.4838, -0.3748, -0.3576, -0.5139, -0.4236, -0.4243, -0.3922,
        -0.2811, -0.2076, -0.4316, -0.3342, -0.4472, -0.5139, -0.4054, -0.2218,
        -0.2978, -0.3339, -0.3218, -0.4119, -0.3013, -0.4312, -0.5182, -0.4040,
        -0.3741, -0.4744, -0.2861, -0.1870, -0.3056, -0.3922, -0.3704, -0.2723,
        -0.3998, -0.4744, -0.4773, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4838, -0.3748, -0.3576, -0.5139, -0.4236, -0.4243, -0.3922,
        -0.2811, -0.2076, -0.4316, -

Tokens embeddings:
tensor([-0.3412, -0.4838, -0.3748, -0.3576, -0.5139, -0.4236, -0.4243, -0.3922,
        -0.2811, -0.2076, -0.4316, -0.3342, -0.4472, -0.5139, -0.4054, -0.2218,
        -0.2978, -0.3339, -0.3218, -0.4119, -0.3013, -0.4312, -0.5182, -0.4040,
        -0.3741, -0.4744, -0.2861, -0.1870, -0.3056, -0.3922, -0.3704, -0.2723,
        -0.3998, -0.4744, -0.4773, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4838, -0.3748, -0.3576, -0.5139, -0.4236, -0.4243, -0.3922,
        -0.2811, -0.2076, -0.4316, -

Tokens embeddings:
tensor([-0.3412, -0.4838, -0.3748, -0.3576, -0.5139, -0.4236, -0.4243, -0.3922,
        -0.2811, -0.2076, -0.4316, -0.3342, -0.4472, -0.5139, -0.4054, -0.2218,
        -0.2978, -0.3339, -0.3218, -0.4119, -0.3013, -0.4312, -0.5182, -0.4040,
        -0.3741, -0.4744, -0.2861, -0.1870, -0.3056, -0.3922, -0.3704, -0.2723,
        -0.3998, -0.4744, -0.4773, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4838, -0.3748, -0.3576, -0.5139, -0.4236, -0.4243, -0.3922,
        -0.2811, -0.2076, -0.4316, -

Tokens embeddings:
tensor([-0.3412, -0.4838, -0.3748, -0.3576, -0.5139, -0.4236, -0.4243, -0.3922,
        -0.2811, -0.2076, -0.4316, -0.3342, -0.4472, -0.5139, -0.4054, -0.2218,
        -0.2978, -0.3339, -0.3218, -0.4119, -0.3013, -0.4312, -0.5182, -0.4040,
        -0.3741, -0.4744, -0.2861, -0.1870, -0.3056, -0.3922, -0.3704, -0.2723,
        -0.3998, -0.4744, -0.4773, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4838, -0.3748, -0.3576, -0.5139, -0.4236, -0.4243, -0.3922,
        -0.2811, -0.2076, -0.4316, -

Tokens embeddings:
tensor([-0.3412, -0.4838, -0.3748, -0.3576, -0.5139, -0.4236, -0.4243, -0.3922,
        -0.2811, -0.2076, -0.4316, -0.3342, -0.4472, -0.5139, -0.4054, -0.2218,
        -0.2978, -0.3339, -0.3218, -0.4119, -0.3013, -0.4312, -0.5182, -0.4040,
        -0.3741, -0.4744, -0.2861, -0.1870, -0.3056, -0.3922, -0.3704, -0.2723,
        -0.3998, -0.4744, -0.4773, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4838, -0.3748, -0.3576, -0.5139, -0.4236, -0.4243, -0.3922,
        -0.2811, -0.2076, -0.4316, -

Tokens embeddings:
tensor([-0.3412, -0.4838, -0.3748, -0.3576, -0.5139, -0.4236, -0.4243, -0.3922,
        -0.2811, -0.2076, -0.4316, -0.3342, -0.4472, -0.5139, -0.4054, -0.2218,
        -0.2978, -0.3339, -0.3218, -0.4119, -0.3013, -0.4312, -0.5182, -0.4040,
        -0.3741, -0.4744, -0.2861, -0.1870, -0.3056, -0.3922, -0.3704, -0.2723,
        -0.3998, -0.4744, -0.4773, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4838, -0.3748, -0.3576, -0.5139, -0.4236, -0.4243, -0.3922,
        -0.2811, -0.2076, -0.4316, -

Tokens embeddings:
tensor([-0.3412, -0.4838, -0.3748, -0.3576, -0.5139, -0.4236, -0.4243, -0.3922,
        -0.2811, -0.2076, -0.4316, -0.3342, -0.4472, -0.5139, -0.4054, -0.2218,
        -0.2978, -0.3339, -0.3218, -0.4119, -0.3013, -0.4312, -0.5182, -0.4040,
        -0.3741, -0.4744, -0.2861, -0.1870, -0.3056, -0.3922, -0.3704, -0.2723,
        -0.3998, -0.4744, -0.4773, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4838, -0.3748, -0.3576, -0.5139, -0.4236, -0.4243, -0.3922,
        -0.2811, -0.2076, -0.4316, -

Tokens embeddings:
tensor([-0.3412, -0.4838, -0.3748, -0.3576, -0.5139, -0.4236, -0.4243, -0.3922,
        -0.2811, -0.2076, -0.4316, -0.3342, -0.4472, -0.5139, -0.4054, -0.2218,
        -0.2978, -0.3339, -0.3218, -0.4119, -0.3013, -0.4312, -0.5182, -0.4040,
        -0.3741, -0.4744, -0.2861, -0.1870, -0.3056, -0.3922, -0.3704, -0.2723,
        -0.3998, -0.4744, -0.4773, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4838, -0.3748, -0.3576, -0.5139, -0.4236, -0.4243, -0.3922,
        -0.2811, -0.2076, -0.4316, -

Tokens embeddings:
tensor([-0.3412, -0.4119, -0.4119, -0.4019, -0.4744, -0.4562, -0.4407, -0.4006,
        -0.3520, -0.2076, -0.4119, -0.4119, -0.5054, -0.3980, -0.3016, -0.2500,
        -0.3791, -0.0990, -0.4749, -0.4536, -0.2856, -0.2076, -0.4901, -0.2723,
        -0.4224, -0.4340, -0.1870, -0.2983, -0.1870, -0.3980, -0.4673, -0.4309,
        -0.4006, -0.2208, -0.2478, -0.3741, -0.4263, -0.0880, -0.2976, -0.2076,
        -0.4119, -0.4119, -0.4562, -0.2336, -0.2978, -0.4372, -0.4292, -0.0990,
        -0.4119, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4119, -0.4119, -0.4019, -0.4744, -0.4562, -0.4407, -0.4006,
        -0.3520, -0.2076, -0.4119, -

Tokens embeddings:
tensor([-0.3412, -0.4119, -0.4119, -0.4019, -0.4744, -0.4562, -0.4407, -0.4006,
        -0.3520, -0.2076, -0.4119, -0.4119, -0.5054, -0.3980, -0.3016, -0.2500,
        -0.3791, -0.0990, -0.4749, -0.4536, -0.2856, -0.2076, -0.4901, -0.2723,
        -0.4224, -0.4340, -0.1870, -0.2983, -0.1870, -0.3980, -0.4673, -0.4309,
        -0.4006, -0.2208, -0.2478, -0.3741, -0.4263, -0.0880, -0.2976, -0.2076,
        -0.4119, -0.4119, -0.4562, -0.2336, -0.2978, -0.4372, -0.4292, -0.0990,
        -0.4119, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4119, -0.4119, -0.4019, -0.4744, -0.4562, -0.4407, -0.4006,
        -0.3520, -0.2076, -0.4119, -

Tokens embeddings:
tensor([-0.3412, -0.4119, -0.4119, -0.4019, -0.4744, -0.4562, -0.4407, -0.4006,
        -0.3520, -0.2076, -0.4119, -0.4119, -0.5054, -0.3980, -0.3016, -0.2500,
        -0.3791, -0.0990, -0.4749, -0.4536, -0.2856, -0.2076, -0.4901, -0.2723,
        -0.4224, -0.4340, -0.1870, -0.2983, -0.1870, -0.3980, -0.4673, -0.4309,
        -0.4006, -0.2208, -0.2478, -0.3741, -0.4263, -0.0880, -0.2976, -0.2076,
        -0.4119, -0.4119, -0.4562, -0.2336, -0.2978, -0.4372, -0.4292, -0.0990,
        -0.4119, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4119, -0.4119, -0.4019, -0.4744, -0.4562, -0.4407, -0.4006,
        -0.3520, -0.2076, -0.4119, -

Tokens embeddings:
tensor([-0.3412, -0.4119, -0.4119, -0.4019, -0.4744, -0.4562, -0.4407, -0.4006,
        -0.3520, -0.2076, -0.4119, -0.4119, -0.5054, -0.3980, -0.3016, -0.2500,
        -0.3791, -0.0990, -0.4749, -0.4536, -0.2856, -0.2076, -0.4901, -0.2723,
        -0.4224, -0.4340, -0.1870, -0.2983, -0.1870, -0.3980, -0.4673, -0.4309,
        -0.4006, -0.2208, -0.2478, -0.3741, -0.4263, -0.0880, -0.2976, -0.2076,
        -0.4119, -0.4119, -0.4562, -0.2336, -0.2978, -0.4372, -0.4292, -0.0990,
        -0.4119, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4119, -0.4119, -0.4019, -0.4744, -0.4562, -0.4407, -0.4006,
        -0.3520, -0.2076, -0.4119, -

Tokens embeddings:
tensor([-0.3412, -0.4119, -0.4119, -0.4019, -0.4744, -0.4562, -0.4407, -0.4006,
        -0.3520, -0.2076, -0.4119, -0.4119, -0.5054, -0.3980, -0.3016, -0.2500,
        -0.3791, -0.0990, -0.4749, -0.4536, -0.2856, -0.2076, -0.4901, -0.2723,
        -0.4224, -0.4340, -0.1870, -0.2983, -0.1870, -0.3980, -0.4673, -0.4309,
        -0.4006, -0.2208, -0.2478, -0.3741, -0.4263, -0.0880, -0.2976, -0.2076,
        -0.4119, -0.4119, -0.4562, -0.2336, -0.2978, -0.4372, -0.4292, -0.0990,
        -0.4119, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4119, -0.4119, -0.4019, -0.4744, -0.4562, -0.4407, -0.4006,
        -0.3520, -0.2076, -0.4119, -

Tokens embeddings:
tensor([-0.3412, -0.4119, -0.4119, -0.4019, -0.4744, -0.4562, -0.4407, -0.4006,
        -0.3520, -0.2076, -0.4119, -0.4119, -0.5054, -0.3980, -0.3016, -0.2500,
        -0.3791, -0.0990, -0.4749, -0.4536, -0.2856, -0.2076, -0.4901, -0.2723,
        -0.4224, -0.4340, -0.1870, -0.2983, -0.1870, -0.3980, -0.4673, -0.4309,
        -0.4006, -0.2208, -0.2478, -0.3741, -0.4263, -0.0880, -0.2976, -0.2076,
        -0.4119, -0.4119, -0.4562, -0.2336, -0.2978, -0.4372, -0.4292, -0.0990,
        -0.4119, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4119, -0.4119, -0.4019, -0.4744, -0.4562, -0.4407, -0.4006,
        -0.3520, -0.2076, -0.4119, -

Tokens embeddings:
tensor([-0.3412, -0.4119, -0.4119, -0.4019, -0.4744, -0.4562, -0.4407, -0.4006,
        -0.3520, -0.2076, -0.4119, -0.4119, -0.5054, -0.3980, -0.3016, -0.2500,
        -0.3791, -0.0990, -0.4749, -0.4536, -0.2856, -0.2076, -0.4901, -0.2723,
        -0.4224, -0.4340, -0.1870, -0.2983, -0.1870, -0.3980, -0.4673, -0.4309,
        -0.4006, -0.2208, -0.2478, -0.3741, -0.4263, -0.0880, -0.2976, -0.2076,
        -0.4119, -0.4119, -0.4562, -0.2336, -0.2978, -0.4372, -0.4292, -0.0990,
        -0.4119, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4119, -0.4119, -0.4019, -0.4744, -0.4562, -0.4407, -0.4006,
        -0.3520, -0.2076, -0.4119, -

Tokens embeddings:
tensor([-0.3412, -0.4119, -0.4119, -0.4019, -0.4744, -0.4562, -0.4407, -0.4006,
        -0.3520, -0.2076, -0.4119, -0.4119, -0.5054, -0.3980, -0.3016, -0.2500,
        -0.3791, -0.0990, -0.4749, -0.4536, -0.2856, -0.2076, -0.4901, -0.2723,
        -0.4224, -0.4340, -0.1870, -0.2983, -0.1870, -0.3980, -0.4673, -0.4309,
        -0.4006, -0.2208, -0.2478, -0.3741, -0.4263, -0.0880, -0.2976, -0.2076,
        -0.4119, -0.4119, -0.4562, -0.2336, -0.2978, -0.4372, -0.4292, -0.0990,
        -0.4119, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4119, -0.4119, -0.4019, -0.4744, -0.4562, -0.4407, -0.4006,
        -0.3520, -0.2076, -0.4119, -

Tokens embeddings:
tensor([-0.3412, -0.4119, -0.4119, -0.4019, -0.4744, -0.4562, -0.4407, -0.4006,
        -0.3520, -0.2076, -0.4119, -0.4119, -0.5054, -0.3980, -0.3016, -0.2500,
        -0.3791, -0.0990, -0.4749, -0.4536, -0.2856, -0.2076, -0.4901, -0.2723,
        -0.4224, -0.4340, -0.1870, -0.2983, -0.1870, -0.3980, -0.4673, -0.4309,
        -0.4006, -0.2208, -0.2478, -0.3741, -0.4263, -0.0880, -0.2976, -0.2076,
        -0.4119, -0.4119, -0.4562, -0.2336, -0.2978, -0.4372, -0.4292, -0.0990,
        -0.4119, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4119, -0.4119, -0.4019, -0.4744, -0.4562, -0.4407, -0.4006,
        -0.3520, -0.2076, -0.4119, -

Tokens embeddings:
tensor([-0.3412, -0.4119, -0.4119, -0.4019, -0.4744, -0.4562, -0.4407, -0.4006,
        -0.3520, -0.2076, -0.4119, -0.4119, -0.5054, -0.3980, -0.3016, -0.2500,
        -0.3791, -0.0990, -0.4749, -0.4536, -0.2856, -0.2076, -0.4901, -0.2723,
        -0.4224, -0.4340, -0.1870, -0.2983, -0.1870, -0.3980, -0.4673, -0.4309,
        -0.4006, -0.2208, -0.2478, -0.3741, -0.4263, -0.0880, -0.2976, -0.2076,
        -0.4119, -0.4119, -0.4562, -0.2336, -0.2978, -0.4372, -0.4292, -0.0990,
        -0.4119, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4119, -0.4119, -0.4019, -0.4744, -0.4562, -0.4407, -0.4006,
        -0.3520, -0.2076, -0.4119, -

Tokens embeddings:
tensor([-0.3412, -0.4119, -0.4119, -0.4019, -0.4744, -0.4562, -0.4407, -0.4006,
        -0.3520, -0.2076, -0.4119, -0.4119, -0.5054, -0.3980, -0.3016, -0.2500,
        -0.3791, -0.0990, -0.4749, -0.4536, -0.2856, -0.2076, -0.4901, -0.2723,
        -0.4224, -0.4340, -0.1870, -0.2983, -0.1870, -0.3980, -0.4673, -0.4309,
        -0.4006, -0.2208, -0.2478, -0.3741, -0.4263, -0.0880, -0.2976, -0.2076,
        -0.4119, -0.4119, -0.4562, -0.2336, -0.2978, -0.4372, -0.4292, -0.0990,
        -0.4119, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4119, -0.4119, -0.4019, -0.4744, -0.4562, -0.4407, -0.4006,
        -0.3520, -0.2076, -0.4119, -

Tokens embeddings:
tensor([-0.3412, -0.4119, -0.4119, -0.4019, -0.4744, -0.4562, -0.4407, -0.4006,
        -0.3520, -0.2076, -0.4119, -0.4119, -0.5054, -0.3980, -0.3016, -0.2500,
        -0.3791, -0.0990, -0.4749, -0.4536, -0.2856, -0.2076, -0.4901, -0.2723,
        -0.4224, -0.4340, -0.1870, -0.2983, -0.1870, -0.3980, -0.4673, -0.4309,
        -0.4006, -0.2208, -0.2478, -0.3741, -0.4263, -0.0880, -0.2976, -0.2076,
        -0.4119, -0.4119, -0.4562, -0.2336, -0.2978, -0.4372, -0.4292, -0.0990,
        -0.4119, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4119, -0.4119, -0.4019, -0.4744, -0.4562, -0.4407, -0.4006,
        -0.3520, -0.2076, -0.4119, -

Tokens embeddings:
tensor([-0.3412, -0.2978, -0.4043, -0.3736, -0.3429, -0.3686, -0.4217, -0.4119,
        -0.4338, -0.5190, -0.2723, -0.5442, -0.3932, -0.3741, -0.3746, -0.3608,
        -0.2218, -0.2511, -0.2723, -0.2098, -0.2740, -0.3842, -0.4064, -0.3741,
        -0.4744, -0.3488, -0.4333, -0.4565, -0.1412, -0.2723, -0.2719, -0.3049,
        -0.3614, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2978, -0.4043, -0.3736, -0.3429, -0.3686, -0.4217, -0.4119,
        -0.4338, -0.5190, -0.2723, -

Tokens embeddings:
tensor([-0.3412, -0.2978, -0.4043, -0.3736, -0.3429, -0.3686, -0.4217, -0.4119,
        -0.4338, -0.5190, -0.2723, -0.5442, -0.3932, -0.3741, -0.3746, -0.3608,
        -0.2218, -0.2511, -0.2723, -0.2098, -0.2740, -0.3842, -0.4064, -0.3741,
        -0.4744, -0.3488, -0.4333, -0.4565, -0.1412, -0.2723, -0.2719, -0.3049,
        -0.3614, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2978, -0.4043, -0.3736, -0.3429, -0.3686, -0.4217, -0.4119,
        -0.4338, -0.5190, -0.2723, -

Tokens embeddings:
tensor([-0.3412, -0.2978, -0.4043, -0.3736, -0.3429, -0.3686, -0.4217, -0.4119,
        -0.4338, -0.5190, -0.2723, -0.5442, -0.3932, -0.3741, -0.3746, -0.3608,
        -0.2218, -0.2511, -0.2723, -0.2098, -0.2740, -0.3842, -0.4064, -0.3741,
        -0.4744, -0.3488, -0.4333, -0.4565, -0.1412, -0.2723, -0.2719, -0.3049,
        -0.3614, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2978, -0.4043, -0.3736, -0.3429, -0.3686, -0.4217, -0.4119,
        -0.4338, -0.5190, -0.2723, -

Tokens embeddings:
tensor([-0.3412, -0.2978, -0.4043, -0.3736, -0.3429, -0.3686, -0.4217, -0.4119,
        -0.4338, -0.5190, -0.2723, -0.5442, -0.3932, -0.3741, -0.3746, -0.3608,
        -0.2218, -0.2511, -0.2723, -0.2098, -0.2740, -0.3842, -0.4064, -0.3741,
        -0.4744, -0.3488, -0.4333, -0.4565, -0.1412, -0.2723, -0.2719, -0.3049,
        -0.3614, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2978, -0.4043, -0.3736, -0.3429, -0.3686, -0.4217, -0.4119,
        -0.4338, -0.5190, -0.2723, -

Tokens embeddings:
tensor([-0.3412, -0.2978, -0.4043, -0.3736, -0.3429, -0.3686, -0.4217, -0.4119,
        -0.4338, -0.5190, -0.2723, -0.5442, -0.3932, -0.3741, -0.3746, -0.3608,
        -0.2218, -0.2511, -0.2723, -0.2098, -0.2740, -0.3842, -0.4064, -0.3741,
        -0.4744, -0.3488, -0.4333, -0.4565, -0.1412, -0.2723, -0.2719, -0.3049,
        -0.3614, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2978, -0.4043, -0.3736, -0.3429, -0.3686, -0.4217, -0.4119,
        -0.4338, -0.5190, -0.2723, -

Tokens embeddings:
tensor([-0.3412, -0.2978, -0.4043, -0.3736, -0.3429, -0.3686, -0.4217, -0.4119,
        -0.4338, -0.5190, -0.2723, -0.5442, -0.3932, -0.3741, -0.3746, -0.3608,
        -0.2218, -0.2511, -0.2723, -0.2098, -0.2740, -0.3842, -0.4064, -0.3741,
        -0.4744, -0.3488, -0.4333, -0.4565, -0.1412, -0.2723, -0.2719, -0.3049,
        -0.3614, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2978, -0.4043, -0.3736, -0.3429, -0.3686, -0.4217, -0.4119,
        -0.4338, -0.5190, -0.2723, -

Tokens embeddings:
tensor([-0.3412, -0.2978, -0.4043, -0.3736, -0.3429, -0.3686, -0.4217, -0.4119,
        -0.4338, -0.5190, -0.2723, -0.5442, -0.3932, -0.3741, -0.3746, -0.3608,
        -0.2218, -0.2511, -0.2723, -0.2098, -0.2740, -0.3842, -0.4064, -0.3741,
        -0.4744, -0.3488, -0.4333, -0.4565, -0.1412, -0.2723, -0.2719, -0.3049,
        -0.3614, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2978, -0.4043, -0.3736, -0.3429, -0.3686, -0.4217, -0.4119,
        -0.4338, -0.5190, -0.2723, -

Tokens embeddings:
tensor([-0.3412, -0.2978, -0.4043, -0.3736, -0.3429, -0.3686, -0.4217, -0.4119,
        -0.4338, -0.5190, -0.2723, -0.5442, -0.3932, -0.3741, -0.3746, -0.3608,
        -0.2218, -0.2511, -0.2723, -0.2098, -0.2740, -0.3842, -0.4064, -0.3741,
        -0.4744, -0.3488, -0.4333, -0.4565, -0.1412, -0.2723, -0.2719, -0.3049,
        -0.3614, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2978, -0.4043, -0.3736, -0.3429, -0.3686, -0.4217, -0.4119,
        -0.4338, -0.5190, -0.2723, -

Tokens embeddings:
tensor([-0.3412, -0.2978, -0.4043, -0.3736, -0.3429, -0.3686, -0.4217, -0.4119,
        -0.4338, -0.5190, -0.2723, -0.5442, -0.3932, -0.3741, -0.3746, -0.3608,
        -0.2218, -0.2511, -0.2723, -0.2098, -0.2740, -0.3842, -0.4064, -0.3741,
        -0.4744, -0.3488, -0.4333, -0.4565, -0.1412, -0.2723, -0.2719, -0.3049,
        -0.3614, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2978, -0.4043, -0.3736, -0.3429, -0.3686, -0.4217, -0.4119,
        -0.4338, -0.5190, -0.2723, -

Tokens embeddings:
tensor([-0.3412, -0.2978, -0.4043, -0.3736, -0.3429, -0.3686, -0.4217, -0.4119,
        -0.4338, -0.5190, -0.2723, -0.5442, -0.3932, -0.3741, -0.3746, -0.3608,
        -0.2218, -0.2511, -0.2723, -0.2098, -0.2740, -0.3842, -0.4064, -0.3741,
        -0.4744, -0.3488, -0.4333, -0.4565, -0.1412, -0.2723, -0.2719, -0.3049,
        -0.3614, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2978, -0.4043, -0.3736, -0.3429, -0.3686, -0.4217, -0.4119,
        -0.4338, -0.5190, -0.2723, -

Tokens embeddings:
tensor([-0.3412, -0.2978, -0.4043, -0.3736, -0.3429, -0.3686, -0.4217, -0.4119,
        -0.4338, -0.5190, -0.2723, -0.5442, -0.3932, -0.3741, -0.3746, -0.3608,
        -0.2218, -0.2511, -0.2723, -0.2098, -0.2740, -0.3842, -0.4064, -0.3741,
        -0.4744, -0.3488, -0.4333, -0.4565, -0.1412, -0.2723, -0.2719, -0.3049,
        -0.3614, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2978, -0.4043, -0.3736, -0.3429, -0.3686, -0.4217, -0.4119,
        -0.4338, -0.5190, -0.2723, -

Tokens embeddings:
tensor([-0.3412, -0.2978, -0.4043, -0.3736, -0.3429, -0.3686, -0.4217, -0.4119,
        -0.4338, -0.5190, -0.2723, -0.5442, -0.3932, -0.3741, -0.3746, -0.3608,
        -0.2218, -0.2511, -0.2723, -0.2098, -0.2740, -0.3842, -0.4064, -0.3741,
        -0.4744, -0.3488, -0.4333, -0.4565, -0.1412, -0.2723, -0.2719, -0.3049,
        -0.3614, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2978, -0.4043, -0.3736, -0.3429, -0.3686, -0.4217, -0.4119,
        -0.4338, -0.5190, -0.2723, -

Tokens embeddings:
tensor([-0.3412, -0.4119, -0.4119, -0.3437, -0.5139, -0.4190, -0.2651, -0.2785,
        -0.2247, -0.4894, -0.1519, -0.4054, -0.3218, -0.4119, -0.3013, -0.4550,
        -0.3200, -0.0990, -0.4119, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4119, -0.4119, -0.3437, -0.5139, -0.4190, -0.2651, -0.2785,
        -0.2247, -0.4894, -0.1519, -

Tokens embeddings:
tensor([-0.3412, -0.4119, -0.4119, -0.3437, -0.5139, -0.4190, -0.2651, -0.2785,
        -0.2247, -0.4894, -0.1519, -0.4054, -0.3218, -0.4119, -0.3013, -0.4550,
        -0.3200, -0.0990, -0.4119, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4119, -0.4119, -0.3437, -0.5139, -0.4190, -0.2651, -0.2785,
        -0.2247, -0.4894, -0.1519, -

Tokens embeddings:
tensor([-0.3412, -0.4119, -0.4119, -0.3437, -0.5139, -0.4190, -0.2651, -0.2785,
        -0.2247, -0.4894, -0.1519, -0.4054, -0.3218, -0.4119, -0.3013, -0.4550,
        -0.3200, -0.0990, -0.4119, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4119, -0.4119, -0.3437, -0.5139, -0.4190, -0.2651, -0.2785,
        -0.2247, -0.4894, -0.1519, -

Tokens embeddings:
tensor([-0.3412, -0.4119, -0.4119, -0.3437, -0.5139, -0.4190, -0.2651, -0.2785,
        -0.2247, -0.4894, -0.1519, -0.4054, -0.3218, -0.4119, -0.3013, -0.4550,
        -0.3200, -0.0990, -0.4119, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4119, -0.4119, -0.3437, -0.5139, -0.4190, -0.2651, -0.2785,
        -0.2247, -0.4894, -0.1519, -

Tokens embeddings:
tensor([-0.3412, -0.4119, -0.4119, -0.3437, -0.5139, -0.4190, -0.2651, -0.2785,
        -0.2247, -0.4894, -0.1519, -0.4054, -0.3218, -0.4119, -0.3013, -0.4550,
        -0.3200, -0.0990, -0.4119, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4119, -0.4119, -0.3437, -0.5139, -0.4190, -0.2651, -0.2785,
        -0.2247, -0.4894, -0.1519, -

Tokens embeddings:
tensor([-0.3412, -0.4119, -0.4119, -0.3437, -0.5139, -0.4190, -0.2651, -0.2785,
        -0.2247, -0.4894, -0.1519, -0.4054, -0.3218, -0.4119, -0.3013, -0.4550,
        -0.3200, -0.0990, -0.4119, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4119, -0.4119, -0.3437, -0.5139, -0.4190, -0.2651, -0.2785,
        -0.2247, -0.4894, -0.1519, -

Tokens embeddings:
tensor([-0.3412, -0.4119, -0.4119, -0.3437, -0.5139, -0.4190, -0.2651, -0.2785,
        -0.2247, -0.4894, -0.1519, -0.4054, -0.3218, -0.4119, -0.3013, -0.4550,
        -0.3200, -0.0990, -0.4119, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4119, -0.4119, -0.3437, -0.5139, -0.4190, -0.2651, -0.2785,
        -0.2247, -0.4894, -0.1519, -

Tokens embeddings:
tensor([-0.3412, -0.4119, -0.4119, -0.3437, -0.5139, -0.4190, -0.2651, -0.2785,
        -0.2247, -0.4894, -0.1519, -0.4054, -0.3218, -0.4119, -0.3013, -0.4550,
        -0.3200, -0.0990, -0.4119, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4119, -0.4119, -0.3437, -0.5139, -0.4190, -0.2651, -0.2785,
        -0.2247, -0.4894, -0.1519, -

Tokens embeddings:
tensor([-0.3412, -0.4119, -0.4119, -0.3437, -0.5139, -0.4190, -0.2651, -0.2785,
        -0.2247, -0.4894, -0.1519, -0.4054, -0.3218, -0.4119, -0.3013, -0.4550,
        -0.3200, -0.0990, -0.4119, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4119, -0.4119, -0.3437, -0.5139, -0.4190, -0.2651, -0.2785,
        -0.2247, -0.4894, -0.1519, -

Tokens embeddings:
tensor([-0.3412, -0.4119, -0.4119, -0.3437, -0.5139, -0.4190, -0.2651, -0.2785,
        -0.2247, -0.4894, -0.1519, -0.4054, -0.3218, -0.4119, -0.3013, -0.4550,
        -0.3200, -0.0990, -0.4119, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4119, -0.4119, -0.3437, -0.5139, -0.4190, -0.2651, -0.2785,
        -0.2247, -0.4894, -0.1519, -

Tokens embeddings:
tensor([-0.3412, -0.4119, -0.4119, -0.3437, -0.5139, -0.4190, -0.2651, -0.2785,
        -0.2247, -0.4894, -0.1519, -0.4054, -0.3218, -0.4119, -0.3013, -0.4550,
        -0.3200, -0.0990, -0.4119, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4119, -0.4119, -0.3437, -0.5139, -0.4190, -0.2651, -0.2785,
        -0.2247, -0.4894, -0.1519, -

Tokens embeddings:
tensor([-0.3412, -0.4119, -0.4119, -0.3437, -0.5139, -0.4190, -0.2651, -0.2785,
        -0.2247, -0.4894, -0.1519, -0.4054, -0.3218, -0.4119, -0.3013, -0.4550,
        -0.3200, -0.0990, -0.4119, -0.2812, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4119, -0.4119, -0.3437, -0.5139, -0.4190, -0.2651, -0.2785,
        -0.2247, -0.4894, -0.1519, -

Tokens embeddings:
tensor([-0.3412, -0.2978, -0.4119, -0.4338, -0.4724, -0.4395, -0.2076, -0.2581,
        -0.2076, -0.2218, -0.3637, -0.5139, -0.3164, -0.2723, -0.4379, -0.3676,
        -0.4744, -0.4347, -0.3842, -0.4581, -0.4249, -0.2002, -0.4378, -0.2076,
        -0.5445, -0.2002, -0.3827, -0.2076, -0.5098, -0.3747, -0.3665, -0.3731,
        -0.3226, -0.3528, -0.4947, -0.1547, -0.3106, -0.2475, -0.2076, -0.4224,
        -0.5212, -0.3661, -0.2921, -0.4224, -0.2627, -0.0485, -0.2213, -0.3764,
        -0.3920, -0.4724, -0.4041, -0.2656, -0.3741, -0.3268, -0.3865, -0.2723,
        -0.3754, -0.4494, -0.4109, -0.3111, -0.3842, -0.3140, -0.1870, -0.1870,
        -0.4119, -0.4119, -0.3780, -0.2265, -0.2076, -0.4119, -0.4119, -0.4357,
        -0.3727, -0.3680, -0.2723, -0.4285, -0.2978, -0.1870, -0.1870, -0.2910,
        -0.4744, -0.4329, -0.3842, -0.4992, -0.2812])
arg max of tensor([-0.3412, -0.2978, -0.4119, -0.4338, -0.4724, -0.4395, -0.2076, -0.2581,
        -0.2076, -0.2218, -0.3637, -

Tokens embeddings:
tensor([-0.3412, -0.2978, -0.4119, -0.4338, -0.4724, -0.4395, -0.2076, -0.2581,
        -0.2076, -0.2218, -0.3637, -0.5139, -0.3164, -0.2723, -0.4379, -0.3676,
        -0.4744, -0.4347, -0.3842, -0.4581, -0.4249, -0.2002, -0.4378, -0.2076,
        -0.5445, -0.2002, -0.3827, -0.2076, -0.5098, -0.3747, -0.3665, -0.3731,
        -0.3226, -0.3528, -0.4947, -0.1547, -0.3106, -0.2475, -0.2076, -0.4224,
        -0.5212, -0.3661, -0.2921, -0.4224, -0.2627, -0.0485, -0.2213, -0.3764,
        -0.3920, -0.4724, -0.4041, -0.2656, -0.3741, -0.3268, -0.3865, -0.2723,
        -0.3754, -0.4494, -0.4109, -0.3111, -0.3842, -0.3140, -0.1870, -0.1870,
        -0.4119, -0.4119, -0.3780, -0.2265, -0.2076, -0.4119, -0.4119, -0.4357,
        -0.3727, -0.3680, -0.2723, -0.4285, -0.2978, -0.1870, -0.1870, -0.2910,
        -0.4744, -0.4329, -0.3842, -0.4992, -0.2812])
arg max of tensor([-0.3412, -0.2978, -0.4119, -0.4338, -0.4724, -0.4395, -0.2076, -0.2581,
        -0.2076, -0.2218, -0.3637, -

Tokens embeddings:
tensor([-0.3412, -0.2978, -0.4119, -0.4338, -0.4724, -0.4395, -0.2076, -0.2581,
        -0.2076, -0.2218, -0.3637, -0.5139, -0.3164, -0.2723, -0.4379, -0.3676,
        -0.4744, -0.4347, -0.3842, -0.4581, -0.4249, -0.2002, -0.4378, -0.2076,
        -0.5445, -0.2002, -0.3827, -0.2076, -0.5098, -0.3747, -0.3665, -0.3731,
        -0.3226, -0.3528, -0.4947, -0.1547, -0.3106, -0.2475, -0.2076, -0.4224,
        -0.5212, -0.3661, -0.2921, -0.4224, -0.2627, -0.0485, -0.2213, -0.3764,
        -0.3920, -0.4724, -0.4041, -0.2656, -0.3741, -0.3268, -0.3865, -0.2723,
        -0.3754, -0.4494, -0.4109, -0.3111, -0.3842, -0.3140, -0.1870, -0.1870,
        -0.4119, -0.4119, -0.3780, -0.2265, -0.2076, -0.4119, -0.4119, -0.4357,
        -0.3727, -0.3680, -0.2723, -0.4285, -0.2978, -0.1870, -0.1870, -0.2910,
        -0.4744, -0.4329, -0.3842, -0.4992, -0.2812])
arg max of tensor([-0.3412, -0.2978, -0.4119, -0.4338, -0.4724, -0.4395, -0.2076, -0.2581,
        -0.2076, -0.2218, -0.3637, -

Tokens embeddings:
tensor([-0.3412, -0.2978, -0.4119, -0.4338, -0.4724, -0.4395, -0.2076, -0.2581,
        -0.2076, -0.2218, -0.3637, -0.5139, -0.3164, -0.2723, -0.4379, -0.3676,
        -0.4744, -0.4347, -0.3842, -0.4581, -0.4249, -0.2002, -0.4378, -0.2076,
        -0.5445, -0.2002, -0.3827, -0.2076, -0.5098, -0.3747, -0.3665, -0.3731,
        -0.3226, -0.3528, -0.4947, -0.1547, -0.3106, -0.2475, -0.2076, -0.4224,
        -0.5212, -0.3661, -0.2921, -0.4224, -0.2627, -0.0485, -0.2213, -0.3764,
        -0.3920, -0.4724, -0.4041, -0.2656, -0.3741, -0.3268, -0.3865, -0.2723,
        -0.3754, -0.4494, -0.4109, -0.3111, -0.3842, -0.3140, -0.1870, -0.1870,
        -0.4119, -0.4119, -0.3780, -0.2265, -0.2076, -0.4119, -0.4119, -0.4357,
        -0.3727, -0.3680, -0.2723, -0.4285, -0.2978, -0.1870, -0.1870, -0.2910,
        -0.4744, -0.4329, -0.3842, -0.4992, -0.2812])
arg max of tensor([-0.3412, -0.2978, -0.4119, -0.4338, -0.4724, -0.4395, -0.2076, -0.2581,
        -0.2076, -0.2218, -0.3637, -

Tokens embeddings:
tensor([-0.3412, -0.2978, -0.4119, -0.4338, -0.4724, -0.4395, -0.2076, -0.2581,
        -0.2076, -0.2218, -0.3637, -0.5139, -0.3164, -0.2723, -0.4379, -0.3676,
        -0.4744, -0.4347, -0.3842, -0.4581, -0.4249, -0.2002, -0.4378, -0.2076,
        -0.5445, -0.2002, -0.3827, -0.2076, -0.5098, -0.3747, -0.3665, -0.3731,
        -0.3226, -0.3528, -0.4947, -0.1547, -0.3106, -0.2475, -0.2076, -0.4224,
        -0.5212, -0.3661, -0.2921, -0.4224, -0.2627, -0.0485, -0.2213, -0.3764,
        -0.3920, -0.4724, -0.4041, -0.2656, -0.3741, -0.3268, -0.3865, -0.2723,
        -0.3754, -0.4494, -0.4109, -0.3111, -0.3842, -0.3140, -0.1870, -0.1870,
        -0.4119, -0.4119, -0.3780, -0.2265, -0.2076, -0.4119, -0.4119, -0.4357,
        -0.3727, -0.3680, -0.2723, -0.4285, -0.2978, -0.1870, -0.1870, -0.2910,
        -0.4744, -0.4329, -0.3842, -0.4992, -0.2812])
arg max of tensor([-0.3412, -0.2978, -0.4119, -0.4338, -0.4724, -0.4395, -0.2076, -0.2581,
        -0.2076, -0.2218, -0.3637, -

Tokens embeddings:
tensor([-0.3412, -0.2978, -0.4119, -0.4338, -0.4724, -0.4395, -0.2076, -0.2581,
        -0.2076, -0.2218, -0.3637, -0.5139, -0.3164, -0.2723, -0.4379, -0.3676,
        -0.4744, -0.4347, -0.3842, -0.4581, -0.4249, -0.2002, -0.4378, -0.2076,
        -0.5445, -0.2002, -0.3827, -0.2076, -0.5098, -0.3747, -0.3665, -0.3731,
        -0.3226, -0.3528, -0.4947, -0.1547, -0.3106, -0.2475, -0.2076, -0.4224,
        -0.5212, -0.3661, -0.2921, -0.4224, -0.2627, -0.0485, -0.2213, -0.3764,
        -0.3920, -0.4724, -0.4041, -0.2656, -0.3741, -0.3268, -0.3865, -0.2723,
        -0.3754, -0.4494, -0.4109, -0.3111, -0.3842, -0.3140, -0.1870, -0.1870,
        -0.4119, -0.4119, -0.3780, -0.2265, -0.2076, -0.4119, -0.4119, -0.4357,
        -0.3727, -0.3680, -0.2723, -0.4285, -0.2978, -0.1870, -0.1870, -0.2910,
        -0.4744, -0.4329, -0.3842, -0.4992, -0.2812])
arg max of tensor([-0.3412, -0.2978, -0.4119, -0.4338, -0.4724, -0.4395, -0.2076, -0.2581,
        -0.2076, -0.2218, -0.3637, -

Tokens embeddings:
tensor([-0.3412, -0.2978, -0.4119, -0.4338, -0.4724, -0.4395, -0.2076, -0.2581,
        -0.2076, -0.2218, -0.3637, -0.5139, -0.3164, -0.2723, -0.4379, -0.3676,
        -0.4744, -0.4347, -0.3842, -0.4581, -0.4249, -0.2002, -0.4378, -0.2076,
        -0.5445, -0.2002, -0.3827, -0.2076, -0.5098, -0.3747, -0.3665, -0.3731,
        -0.3226, -0.3528, -0.4947, -0.1547, -0.3106, -0.2475, -0.2076, -0.4224,
        -0.5212, -0.3661, -0.2921, -0.4224, -0.2627, -0.0485, -0.2213, -0.3764,
        -0.3920, -0.4724, -0.4041, -0.2656, -0.3741, -0.3268, -0.3865, -0.2723,
        -0.3754, -0.4494, -0.4109, -0.3111, -0.3842, -0.3140, -0.1870, -0.1870,
        -0.4119, -0.4119, -0.3780, -0.2265, -0.2076, -0.4119, -0.4119, -0.4357,
        -0.3727, -0.3680, -0.2723, -0.4285, -0.2978, -0.1870, -0.1870, -0.2910,
        -0.4744, -0.4329, -0.3842, -0.4992, -0.2812])
arg max of tensor([-0.3412, -0.2978, -0.4119, -0.4338, -0.4724, -0.4395, -0.2076, -0.2581,
        -0.2076, -0.2218, -0.3637, -

Tokens embeddings:
tensor([-0.3412, -0.2978, -0.4119, -0.4338, -0.4724, -0.4395, -0.2076, -0.2581,
        -0.2076, -0.2218, -0.3637, -0.5139, -0.3164, -0.2723, -0.4379, -0.3676,
        -0.4744, -0.4347, -0.3842, -0.4581, -0.4249, -0.2002, -0.4378, -0.2076,
        -0.5445, -0.2002, -0.3827, -0.2076, -0.5098, -0.3747, -0.3665, -0.3731,
        -0.3226, -0.3528, -0.4947, -0.1547, -0.3106, -0.2475, -0.2076, -0.4224,
        -0.5212, -0.3661, -0.2921, -0.4224, -0.2627, -0.0485, -0.2213, -0.3764,
        -0.3920, -0.4724, -0.4041, -0.2656, -0.3741, -0.3268, -0.3865, -0.2723,
        -0.3754, -0.4494, -0.4109, -0.3111, -0.3842, -0.3140, -0.1870, -0.1870,
        -0.4119, -0.4119, -0.3780, -0.2265, -0.2076, -0.4119, -0.4119, -0.4357,
        -0.3727, -0.3680, -0.2723, -0.4285, -0.2978, -0.1870, -0.1870, -0.2910,
        -0.4744, -0.4329, -0.3842, -0.4992, -0.2812])
arg max of tensor([-0.3412, -0.2978, -0.4119, -0.4338, -0.4724, -0.4395, -0.2076, -0.2581,
        -0.2076, -0.2218, -0.3637, -

Tokens embeddings:
tensor([-0.3412, -0.2978, -0.4119, -0.4338, -0.4724, -0.4395, -0.2076, -0.2581,
        -0.2076, -0.2218, -0.3637, -0.5139, -0.3164, -0.2723, -0.4379, -0.3676,
        -0.4744, -0.4347, -0.3842, -0.4581, -0.4249, -0.2002, -0.4378, -0.2076,
        -0.5445, -0.2002, -0.3827, -0.2076, -0.5098, -0.3747, -0.3665, -0.3731,
        -0.3226, -0.3528, -0.4947, -0.1547, -0.3106, -0.2475, -0.2076, -0.4224,
        -0.5212, -0.3661, -0.2921, -0.4224, -0.2627, -0.0485, -0.2213, -0.3764,
        -0.3920, -0.4724, -0.4041, -0.2656, -0.3741, -0.3268, -0.3865, -0.2723,
        -0.3754, -0.4494, -0.4109, -0.3111, -0.3842, -0.3140, -0.1870, -0.1870,
        -0.4119, -0.4119, -0.3780, -0.2265, -0.2076, -0.4119, -0.4119, -0.4357,
        -0.3727, -0.3680, -0.2723, -0.4285, -0.2978, -0.1870, -0.1870, -0.2910,
        -0.4744, -0.4329, -0.3842, -0.4992, -0.2812])
arg max of tensor([-0.3412, -0.2978, -0.4119, -0.4338, -0.4724, -0.4395, -0.2076, -0.2581,
        -0.2076, -0.2218, -0.3637, -

Tokens embeddings:
tensor([-0.3412, -0.2978, -0.4119, -0.4338, -0.4724, -0.4395, -0.2076, -0.2581,
        -0.2076, -0.2218, -0.3637, -0.5139, -0.3164, -0.2723, -0.4379, -0.3676,
        -0.4744, -0.4347, -0.3842, -0.4581, -0.4249, -0.2002, -0.4378, -0.2076,
        -0.5445, -0.2002, -0.3827, -0.2076, -0.5098, -0.3747, -0.3665, -0.3731,
        -0.3226, -0.3528, -0.4947, -0.1547, -0.3106, -0.2475, -0.2076, -0.4224,
        -0.5212, -0.3661, -0.2921, -0.4224, -0.2627, -0.0485, -0.2213, -0.3764,
        -0.3920, -0.4724, -0.4041, -0.2656, -0.3741, -0.3268, -0.3865, -0.2723,
        -0.3754, -0.4494, -0.4109, -0.3111, -0.3842, -0.3140, -0.1870, -0.1870,
        -0.4119, -0.4119, -0.3780, -0.2265, -0.2076, -0.4119, -0.4119, -0.4357,
        -0.3727, -0.3680, -0.2723, -0.4285, -0.2978, -0.1870, -0.1870, -0.2910,
        -0.4744, -0.4329, -0.3842, -0.4992, -0.2812])
arg max of tensor([-0.3412, -0.2978, -0.4119, -0.4338, -0.4724, -0.4395, -0.2076, -0.2581,
        -0.2076, -0.2218, -0.3637, -

Tokens embeddings:
tensor([-0.3412, -0.2978, -0.4119, -0.4338, -0.4724, -0.4395, -0.2076, -0.2581,
        -0.2076, -0.2218, -0.3637, -0.5139, -0.3164, -0.2723, -0.4379, -0.3676,
        -0.4744, -0.4347, -0.3842, -0.4581, -0.4249, -0.2002, -0.4378, -0.2076,
        -0.5445, -0.2002, -0.3827, -0.2076, -0.5098, -0.3747, -0.3665, -0.3731,
        -0.3226, -0.3528, -0.4947, -0.1547, -0.3106, -0.2475, -0.2076, -0.4224,
        -0.5212, -0.3661, -0.2921, -0.4224, -0.2627, -0.0485, -0.2213, -0.3764,
        -0.3920, -0.4724, -0.4041, -0.2656, -0.3741, -0.3268, -0.3865, -0.2723,
        -0.3754, -0.4494, -0.4109, -0.3111, -0.3842, -0.3140, -0.1870, -0.1870,
        -0.4119, -0.4119, -0.3780, -0.2265, -0.2076, -0.4119, -0.4119, -0.4357,
        -0.3727, -0.3680, -0.2723, -0.4285, -0.2978, -0.1870, -0.1870, -0.2910,
        -0.4744, -0.4329, -0.3842, -0.4992, -0.2812])
arg max of tensor([-0.3412, -0.2978, -0.4119, -0.4338, -0.4724, -0.4395, -0.2076, -0.2581,
        -0.2076, -0.2218, -0.3637, -

Tokens embeddings:
tensor([-0.3412, -0.2978, -0.4119, -0.4338, -0.4724, -0.4395, -0.2076, -0.2581,
        -0.2076, -0.2218, -0.3637, -0.5139, -0.3164, -0.2723, -0.4379, -0.3676,
        -0.4744, -0.4347, -0.3842, -0.4581, -0.4249, -0.2002, -0.4378, -0.2076,
        -0.5445, -0.2002, -0.3827, -0.2076, -0.5098, -0.3747, -0.3665, -0.3731,
        -0.3226, -0.3528, -0.4947, -0.1547, -0.3106, -0.2475, -0.2076, -0.4224,
        -0.5212, -0.3661, -0.2921, -0.4224, -0.2627, -0.0485, -0.2213, -0.3764,
        -0.3920, -0.4724, -0.4041, -0.2656, -0.3741, -0.3268, -0.3865, -0.2723,
        -0.3754, -0.4494, -0.4109, -0.3111, -0.3842, -0.3140, -0.1870, -0.1870,
        -0.4119, -0.4119, -0.3780, -0.2265, -0.2076, -0.4119, -0.4119, -0.4357,
        -0.3727, -0.3680, -0.2723, -0.4285, -0.2978, -0.1870, -0.1870, -0.2910,
        -0.4744, -0.4329, -0.3842, -0.4992, -0.2812])
arg max of tensor([-0.3412, -0.2978, -0.4119, -0.4338, -0.4724, -0.4395, -0.2076, -0.2581,
        -0.2076, -0.2218, -0.3637, -

Tokens embeddings:
tensor([-0.3412, -0.4224, -0.4532, -0.3757, -0.2567, -0.2770, -0.4074, -0.2076,
        -0.4058, -0.2218, -0.4744, -0.2399, -0.3865, -0.3218, -0.4119, -0.3013,
        -0.4646, -0.3673, -0.4357, -0.4727, -0.4357, -0.4215, -0.4093, -0.3432,
        -0.3747, -0.2076, -0.4389, -0.3660, -0.4224, -0.3597, -0.1956, -0.3741,
        -0.3933, -0.4807, -0.3210, -0.2076, -0.3727, -0.4372, -0.2812, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4224, -0.4532, -0.3757, -0.2567, -0.2770, -0.4074, -0.2076,
        -0.4058, -0.2218, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.4224, -0.4532, -0.3757, -0.2567, -0.2770, -0.4074, -0.2076,
        -0.4058, -0.2218, -0.4744, -0.2399, -0.3865, -0.3218, -0.4119, -0.3013,
        -0.4646, -0.3673, -0.4357, -0.4727, -0.4357, -0.4215, -0.4093, -0.3432,
        -0.3747, -0.2076, -0.4389, -0.3660, -0.4224, -0.3597, -0.1956, -0.3741,
        -0.3933, -0.4807, -0.3210, -0.2076, -0.3727, -0.4372, -0.2812, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4224, -0.4532, -0.3757, -0.2567, -0.2770, -0.4074, -0.2076,
        -0.4058, -0.2218, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.4224, -0.4532, -0.3757, -0.2567, -0.2770, -0.4074, -0.2076,
        -0.4058, -0.2218, -0.4744, -0.2399, -0.3865, -0.3218, -0.4119, -0.3013,
        -0.4646, -0.3673, -0.4357, -0.4727, -0.4357, -0.4215, -0.4093, -0.3432,
        -0.3747, -0.2076, -0.4389, -0.3660, -0.4224, -0.3597, -0.1956, -0.3741,
        -0.3933, -0.4807, -0.3210, -0.2076, -0.3727, -0.4372, -0.2812, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4224, -0.4532, -0.3757, -0.2567, -0.2770, -0.4074, -0.2076,
        -0.4058, -0.2218, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.4224, -0.4532, -0.3757, -0.2567, -0.2770, -0.4074, -0.2076,
        -0.4058, -0.2218, -0.4744, -0.2399, -0.3865, -0.3218, -0.4119, -0.3013,
        -0.4646, -0.3673, -0.4357, -0.4727, -0.4357, -0.4215, -0.4093, -0.3432,
        -0.3747, -0.2076, -0.4389, -0.3660, -0.4224, -0.3597, -0.1956, -0.3741,
        -0.3933, -0.4807, -0.3210, -0.2076, -0.3727, -0.4372, -0.2812, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4224, -0.4532, -0.3757, -0.2567, -0.2770, -0.4074, -0.2076,
        -0.4058, -0.2218, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.4224, -0.4532, -0.3757, -0.2567, -0.2770, -0.4074, -0.2076,
        -0.4058, -0.2218, -0.4744, -0.2399, -0.3865, -0.3218, -0.4119, -0.3013,
        -0.4646, -0.3673, -0.4357, -0.4727, -0.4357, -0.4215, -0.4093, -0.3432,
        -0.3747, -0.2076, -0.4389, -0.3660, -0.4224, -0.3597, -0.1956, -0.3741,
        -0.3933, -0.4807, -0.3210, -0.2076, -0.3727, -0.4372, -0.2812, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4224, -0.4532, -0.3757, -0.2567, -0.2770, -0.4074, -0.2076,
        -0.4058, -0.2218, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.4224, -0.4532, -0.3757, -0.2567, -0.2770, -0.4074, -0.2076,
        -0.4058, -0.2218, -0.4744, -0.2399, -0.3865, -0.3218, -0.4119, -0.3013,
        -0.4646, -0.3673, -0.4357, -0.4727, -0.4357, -0.4215, -0.4093, -0.3432,
        -0.3747, -0.2076, -0.4389, -0.3660, -0.4224, -0.3597, -0.1956, -0.3741,
        -0.3933, -0.4807, -0.3210, -0.2076, -0.3727, -0.4372, -0.2812, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4224, -0.4532, -0.3757, -0.2567, -0.2770, -0.4074, -0.2076,
        -0.4058, -0.2218, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.4224, -0.4532, -0.3757, -0.2567, -0.2770, -0.4074, -0.2076,
        -0.4058, -0.2218, -0.4744, -0.2399, -0.3865, -0.3218, -0.4119, -0.3013,
        -0.4646, -0.3673, -0.4357, -0.4727, -0.4357, -0.4215, -0.4093, -0.3432,
        -0.3747, -0.2076, -0.4389, -0.3660, -0.4224, -0.3597, -0.1956, -0.3741,
        -0.3933, -0.4807, -0.3210, -0.2076, -0.3727, -0.4372, -0.2812, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4224, -0.4532, -0.3757, -0.2567, -0.2770, -0.4074, -0.2076,
        -0.4058, -0.2218, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.4224, -0.4532, -0.3757, -0.2567, -0.2770, -0.4074, -0.2076,
        -0.4058, -0.2218, -0.4744, -0.2399, -0.3865, -0.3218, -0.4119, -0.3013,
        -0.4646, -0.3673, -0.4357, -0.4727, -0.4357, -0.4215, -0.4093, -0.3432,
        -0.3747, -0.2076, -0.4389, -0.3660, -0.4224, -0.3597, -0.1956, -0.3741,
        -0.3933, -0.4807, -0.3210, -0.2076, -0.3727, -0.4372, -0.2812, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4224, -0.4532, -0.3757, -0.2567, -0.2770, -0.4074, -0.2076,
        -0.4058, -0.2218, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.4224, -0.4532, -0.3757, -0.2567, -0.2770, -0.4074, -0.2076,
        -0.4058, -0.2218, -0.4744, -0.2399, -0.3865, -0.3218, -0.4119, -0.3013,
        -0.4646, -0.3673, -0.4357, -0.4727, -0.4357, -0.4215, -0.4093, -0.3432,
        -0.3747, -0.2076, -0.4389, -0.3660, -0.4224, -0.3597, -0.1956, -0.3741,
        -0.3933, -0.4807, -0.3210, -0.2076, -0.3727, -0.4372, -0.2812, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4224, -0.4532, -0.3757, -0.2567, -0.2770, -0.4074, -0.2076,
        -0.4058, -0.2218, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.4224, -0.4532, -0.3757, -0.2567, -0.2770, -0.4074, -0.2076,
        -0.4058, -0.2218, -0.4744, -0.2399, -0.3865, -0.3218, -0.4119, -0.3013,
        -0.4646, -0.3673, -0.4357, -0.4727, -0.4357, -0.4215, -0.4093, -0.3432,
        -0.3747, -0.2076, -0.4389, -0.3660, -0.4224, -0.3597, -0.1956, -0.3741,
        -0.3933, -0.4807, -0.3210, -0.2076, -0.3727, -0.4372, -0.2812, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4224, -0.4532, -0.3757, -0.2567, -0.2770, -0.4074, -0.2076,
        -0.4058, -0.2218, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.4224, -0.4532, -0.3757, -0.2567, -0.2770, -0.4074, -0.2076,
        -0.4058, -0.2218, -0.4744, -0.2399, -0.3865, -0.3218, -0.4119, -0.3013,
        -0.4646, -0.3673, -0.4357, -0.4727, -0.4357, -0.4215, -0.4093, -0.3432,
        -0.3747, -0.2076, -0.4389, -0.3660, -0.4224, -0.3597, -0.1956, -0.3741,
        -0.3933, -0.4807, -0.3210, -0.2076, -0.3727, -0.4372, -0.2812, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4224, -0.4532, -0.3757, -0.2567, -0.2770, -0.4074, -0.2076,
        -0.4058, -0.2218, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.4224, -0.4532, -0.3757, -0.2567, -0.2770, -0.4074, -0.2076,
        -0.4058, -0.2218, -0.4744, -0.2399, -0.3865, -0.3218, -0.4119, -0.3013,
        -0.4646, -0.3673, -0.4357, -0.4727, -0.4357, -0.4215, -0.4093, -0.3432,
        -0.3747, -0.2076, -0.4389, -0.3660, -0.4224, -0.3597, -0.1956, -0.3741,
        -0.3933, -0.4807, -0.3210, -0.2076, -0.3727, -0.4372, -0.2812, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.4224, -0.4532, -0.3757, -0.2567, -0.2770, -0.4074, -0.2076,
        -0.4058, -0.2218, -0.4744, -

Tokens embeddings:
tensor([-0.3412, -0.3786, -0.5149, -0.4309, -0.4581, -0.2881, -0.1997, -0.1737,
        -0.4079, -0.2076, -0.3649, -0.2723, -0.4395, -0.4581, -0.4835, -0.4550,
        -0.4224, -0.3765, -0.1870, -0.2602, -0.2765, -0.3484, -0.1904, -0.4200,
        -0.4556, -0.2076, -0.4591, -0.2820, -0.6072, -0.3979, -0.4542, -0.4119,
        -0.4338, -0.4467, -0.5104, -0.4312, -0.3432, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3786, -0.5149, -0.4309, -0.4581, -0.2881, -0.1997, -0.1737,
        -0.4079, -0.2076, -0.3649, -

Tokens embeddings:
tensor([-0.3412, -0.3786, -0.5149, -0.4309, -0.4581, -0.2881, -0.1997, -0.1737,
        -0.4079, -0.2076, -0.3649, -0.2723, -0.4395, -0.4581, -0.4835, -0.4550,
        -0.4224, -0.3765, -0.1870, -0.2602, -0.2765, -0.3484, -0.1904, -0.4200,
        -0.4556, -0.2076, -0.4591, -0.2820, -0.6072, -0.3979, -0.4542, -0.4119,
        -0.4338, -0.4467, -0.5104, -0.4312, -0.3432, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3786, -0.5149, -0.4309, -0.4581, -0.2881, -0.1997, -0.1737,
        -0.4079, -0.2076, -0.3649, -

Tokens embeddings:
tensor([-0.3412, -0.3786, -0.5149, -0.4309, -0.4581, -0.2881, -0.1997, -0.1737,
        -0.4079, -0.2076, -0.3649, -0.2723, -0.4395, -0.4581, -0.4835, -0.4550,
        -0.4224, -0.3765, -0.1870, -0.2602, -0.2765, -0.3484, -0.1904, -0.4200,
        -0.4556, -0.2076, -0.4591, -0.2820, -0.6072, -0.3979, -0.4542, -0.4119,
        -0.4338, -0.4467, -0.5104, -0.4312, -0.3432, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3786, -0.5149, -0.4309, -0.4581, -0.2881, -0.1997, -0.1737,
        -0.4079, -0.2076, -0.3649, -

Tokens embeddings:
tensor([-0.3412, -0.3786, -0.5149, -0.4309, -0.4581, -0.2881, -0.1997, -0.1737,
        -0.4079, -0.2076, -0.3649, -0.2723, -0.4395, -0.4581, -0.4835, -0.4550,
        -0.4224, -0.3765, -0.1870, -0.2602, -0.2765, -0.3484, -0.1904, -0.4200,
        -0.4556, -0.2076, -0.4591, -0.2820, -0.6072, -0.3979, -0.4542, -0.4119,
        -0.4338, -0.4467, -0.5104, -0.4312, -0.3432, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3786, -0.5149, -0.4309, -0.4581, -0.2881, -0.1997, -0.1737,
        -0.4079, -0.2076, -0.3649, -

Tokens embeddings:
tensor([-0.3412, -0.3786, -0.5149, -0.4309, -0.4581, -0.2881, -0.1997, -0.1737,
        -0.4079, -0.2076, -0.3649, -0.2723, -0.4395, -0.4581, -0.4835, -0.4550,
        -0.4224, -0.3765, -0.1870, -0.2602, -0.2765, -0.3484, -0.1904, -0.4200,
        -0.4556, -0.2076, -0.4591, -0.2820, -0.6072, -0.3979, -0.4542, -0.4119,
        -0.4338, -0.4467, -0.5104, -0.4312, -0.3432, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3786, -0.5149, -0.4309, -0.4581, -0.2881, -0.1997, -0.1737,
        -0.4079, -0.2076, -0.3649, -

Tokens embeddings:
tensor([-0.3412, -0.3786, -0.5149, -0.4309, -0.4581, -0.2881, -0.1997, -0.1737,
        -0.4079, -0.2076, -0.3649, -0.2723, -0.4395, -0.4581, -0.4835, -0.4550,
        -0.4224, -0.3765, -0.1870, -0.2602, -0.2765, -0.3484, -0.1904, -0.4200,
        -0.4556, -0.2076, -0.4591, -0.2820, -0.6072, -0.3979, -0.4542, -0.4119,
        -0.4338, -0.4467, -0.5104, -0.4312, -0.3432, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3786, -0.5149, -0.4309, -0.4581, -0.2881, -0.1997, -0.1737,
        -0.4079, -0.2076, -0.3649, -

Tokens embeddings:
tensor([-0.3412, -0.3786, -0.5149, -0.4309, -0.4581, -0.2881, -0.1997, -0.1737,
        -0.4079, -0.2076, -0.3649, -0.2723, -0.4395, -0.4581, -0.4835, -0.4550,
        -0.4224, -0.3765, -0.1870, -0.2602, -0.2765, -0.3484, -0.1904, -0.4200,
        -0.4556, -0.2076, -0.4591, -0.2820, -0.6072, -0.3979, -0.4542, -0.4119,
        -0.4338, -0.4467, -0.5104, -0.4312, -0.3432, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3786, -0.5149, -0.4309, -0.4581, -0.2881, -0.1997, -0.1737,
        -0.4079, -0.2076, -0.3649, -

Tokens embeddings:
tensor([-0.3412, -0.3786, -0.5149, -0.4309, -0.4581, -0.2881, -0.1997, -0.1737,
        -0.4079, -0.2076, -0.3649, -0.2723, -0.4395, -0.4581, -0.4835, -0.4550,
        -0.4224, -0.3765, -0.1870, -0.2602, -0.2765, -0.3484, -0.1904, -0.4200,
        -0.4556, -0.2076, -0.4591, -0.2820, -0.6072, -0.3979, -0.4542, -0.4119,
        -0.4338, -0.4467, -0.5104, -0.4312, -0.3432, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3786, -0.5149, -0.4309, -0.4581, -0.2881, -0.1997, -0.1737,
        -0.4079, -0.2076, -0.3649, -

Tokens embeddings:
tensor([-0.3412, -0.3786, -0.5149, -0.4309, -0.4581, -0.2881, -0.1997, -0.1737,
        -0.4079, -0.2076, -0.3649, -0.2723, -0.4395, -0.4581, -0.4835, -0.4550,
        -0.4224, -0.3765, -0.1870, -0.2602, -0.2765, -0.3484, -0.1904, -0.4200,
        -0.4556, -0.2076, -0.4591, -0.2820, -0.6072, -0.3979, -0.4542, -0.4119,
        -0.4338, -0.4467, -0.5104, -0.4312, -0.3432, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3786, -0.5149, -0.4309, -0.4581, -0.2881, -0.1997, -0.1737,
        -0.4079, -0.2076, -0.3649, -

Tokens embeddings:
tensor([-0.3412, -0.3786, -0.5149, -0.4309, -0.4581, -0.2881, -0.1997, -0.1737,
        -0.4079, -0.2076, -0.3649, -0.2723, -0.4395, -0.4581, -0.4835, -0.4550,
        -0.4224, -0.3765, -0.1870, -0.2602, -0.2765, -0.3484, -0.1904, -0.4200,
        -0.4556, -0.2076, -0.4591, -0.2820, -0.6072, -0.3979, -0.4542, -0.4119,
        -0.4338, -0.4467, -0.5104, -0.4312, -0.3432, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3786, -0.5149, -0.4309, -0.4581, -0.2881, -0.1997, -0.1737,
        -0.4079, -0.2076, -0.3649, -

Tokens embeddings:
tensor([-0.3412, -0.3786, -0.5149, -0.4309, -0.4581, -0.2881, -0.1997, -0.1737,
        -0.4079, -0.2076, -0.3649, -0.2723, -0.4395, -0.4581, -0.4835, -0.4550,
        -0.4224, -0.3765, -0.1870, -0.2602, -0.2765, -0.3484, -0.1904, -0.4200,
        -0.4556, -0.2076, -0.4591, -0.2820, -0.6072, -0.3979, -0.4542, -0.4119,
        -0.4338, -0.4467, -0.5104, -0.4312, -0.3432, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3786, -0.5149, -0.4309, -0.4581, -0.2881, -0.1997, -0.1737,
        -0.4079, -0.2076, -0.3649, -

Tokens embeddings:
tensor([-0.3412, -0.3786, -0.5149, -0.4309, -0.4581, -0.2881, -0.1997, -0.1737,
        -0.4079, -0.2076, -0.3649, -0.2723, -0.4395, -0.4581, -0.4835, -0.4550,
        -0.4224, -0.3765, -0.1870, -0.2602, -0.2765, -0.3484, -0.1904, -0.4200,
        -0.4556, -0.2076, -0.4591, -0.2820, -0.6072, -0.3979, -0.4542, -0.4119,
        -0.4338, -0.4467, -0.5104, -0.4312, -0.3432, -0.2812, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.3786, -0.5149, -0.4309, -0.4581, -0.2881, -0.1997, -0.1737,
        -0.4079, -0.2076, -0.3649, -

Tokens embeddings:
tensor([-0.3412, -0.2209, -0.2723, -0.2978, -0.4744, -0.3401, -0.4287, -0.4224,
        -0.3701, -0.3226, -0.4119, -0.4119, -0.4724, -0.4715, -0.4550, -0.4422,
        -0.4477, -0.5039, -0.3573, -0.4513, -0.3308, -0.0990, -0.4119, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2209, -0.2723, -0.2978, -0.4744, -0.3401, -0.4287, -0.4224,
        -0.3701, -0.3226, -0.4119, -

Tokens embeddings:
tensor([-0.3412, -0.2209, -0.2723, -0.2978, -0.4744, -0.3401, -0.4287, -0.4224,
        -0.3701, -0.3226, -0.4119, -0.4119, -0.4724, -0.4715, -0.4550, -0.4422,
        -0.4477, -0.5039, -0.3573, -0.4513, -0.3308, -0.0990, -0.4119, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2209, -0.2723, -0.2978, -0.4744, -0.3401, -0.4287, -0.4224,
        -0.3701, -0.3226, -0.4119, -

Tokens embeddings:
tensor([-0.3412, -0.2209, -0.2723, -0.2978, -0.4744, -0.3401, -0.4287, -0.4224,
        -0.3701, -0.3226, -0.4119, -0.4119, -0.4724, -0.4715, -0.4550, -0.4422,
        -0.4477, -0.5039, -0.3573, -0.4513, -0.3308, -0.0990, -0.4119, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2209, -0.2723, -0.2978, -0.4744, -0.3401, -0.4287, -0.4224,
        -0.3701, -0.3226, -0.4119, -

Tokens embeddings:
tensor([-0.3412, -0.2209, -0.2723, -0.2978, -0.4744, -0.3401, -0.4287, -0.4224,
        -0.3701, -0.3226, -0.4119, -0.4119, -0.4724, -0.4715, -0.4550, -0.4422,
        -0.4477, -0.5039, -0.3573, -0.4513, -0.3308, -0.0990, -0.4119, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2209, -0.2723, -0.2978, -0.4744, -0.3401, -0.4287, -0.4224,
        -0.3701, -0.3226, -0.4119, -

Tokens embeddings:
tensor([-0.3412, -0.2209, -0.2723, -0.2978, -0.4744, -0.3401, -0.4287, -0.4224,
        -0.3701, -0.3226, -0.4119, -0.4119, -0.4724, -0.4715, -0.4550, -0.4422,
        -0.4477, -0.5039, -0.3573, -0.4513, -0.3308, -0.0990, -0.4119, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2209, -0.2723, -0.2978, -0.4744, -0.3401, -0.4287, -0.4224,
        -0.3701, -0.3226, -0.4119, -

Tokens embeddings:
tensor([-0.3412, -0.2209, -0.2723, -0.2978, -0.4744, -0.3401, -0.4287, -0.4224,
        -0.3701, -0.3226, -0.4119, -0.4119, -0.4724, -0.4715, -0.4550, -0.4422,
        -0.4477, -0.5039, -0.3573, -0.4513, -0.3308, -0.0990, -0.4119, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2209, -0.2723, -0.2978, -0.4744, -0.3401, -0.4287, -0.4224,
        -0.3701, -0.3226, -0.4119, -

Tokens embeddings:
tensor([-0.3412, -0.2209, -0.2723, -0.2978, -0.4744, -0.3401, -0.4287, -0.4224,
        -0.3701, -0.3226, -0.4119, -0.4119, -0.4724, -0.4715, -0.4550, -0.4422,
        -0.4477, -0.5039, -0.3573, -0.4513, -0.3308, -0.0990, -0.4119, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2209, -0.2723, -0.2978, -0.4744, -0.3401, -0.4287, -0.4224,
        -0.3701, -0.3226, -0.4119, -

Tokens embeddings:
tensor([-0.3412, -0.2209, -0.2723, -0.2978, -0.4744, -0.3401, -0.4287, -0.4224,
        -0.3701, -0.3226, -0.4119, -0.4119, -0.4724, -0.4715, -0.4550, -0.4422,
        -0.4477, -0.5039, -0.3573, -0.4513, -0.3308, -0.0990, -0.4119, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2209, -0.2723, -0.2978, -0.4744, -0.3401, -0.4287, -0.4224,
        -0.3701, -0.3226, -0.4119, -

Tokens embeddings:
tensor([-0.3412, -0.2209, -0.2723, -0.2978, -0.4744, -0.3401, -0.4287, -0.4224,
        -0.3701, -0.3226, -0.4119, -0.4119, -0.4724, -0.4715, -0.4550, -0.4422,
        -0.4477, -0.5039, -0.3573, -0.4513, -0.3308, -0.0990, -0.4119, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2209, -0.2723, -0.2978, -0.4744, -0.3401, -0.4287, -0.4224,
        -0.3701, -0.3226, -0.4119, -

Tokens embeddings:
tensor([-0.3412, -0.2209, -0.2723, -0.2978, -0.4744, -0.3401, -0.4287, -0.4224,
        -0.3701, -0.3226, -0.4119, -0.4119, -0.4724, -0.4715, -0.4550, -0.4422,
        -0.4477, -0.5039, -0.3573, -0.4513, -0.3308, -0.0990, -0.4119, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2209, -0.2723, -0.2978, -0.4744, -0.3401, -0.4287, -0.4224,
        -0.3701, -0.3226, -0.4119, -

Tokens embeddings:
tensor([-0.3412, -0.2209, -0.2723, -0.2978, -0.4744, -0.3401, -0.4287, -0.4224,
        -0.3701, -0.3226, -0.4119, -0.4119, -0.4724, -0.4715, -0.4550, -0.4422,
        -0.4477, -0.5039, -0.3573, -0.4513, -0.3308, -0.0990, -0.4119, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2209, -0.2723, -0.2978, -0.4744, -0.3401, -0.4287, -0.4224,
        -0.3701, -0.3226, -0.4119, -

Tokens embeddings:
tensor([-0.3412, -0.2209, -0.2723, -0.2978, -0.4744, -0.3401, -0.4287, -0.4224,
        -0.3701, -0.3226, -0.4119, -0.4119, -0.4724, -0.4715, -0.4550, -0.4422,
        -0.4477, -0.5039, -0.3573, -0.4513, -0.3308, -0.0990, -0.4119, -0.2812,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729, -0.3729,
        -0.3729, -0.3729, -0.3729, -0.3729, -0.3729])
arg max of tensor([-0.3412, -0.2209, -0.2723, -0.2978, -0.4744, -0.3401, -0.4287, -0.4224,
        -0.3701, -0.3226, -0.4119, -

In [None]:
# Max Pooling for all tokens in sentences and argmax

# Select some sentences randomly
sent_index = [11, 12, 13, 14, 15, 16, 17, 18, 19, 20]

for s in sent_index:
  print("*" * 100)
  # Get the sentence's words
  tokens = test_inputs[s]
  # For each layer...
  for l in range(12):
    print("\nLayer", l+1)
    attention = np.squeeze(test_attentions[s][l].detach().cpu().numpy(), axis=0)
    # and for each head
    #for h, head in enumerate(attention):
    #print("Head", h+1)
    # Get the attention for the cls token
    encoded_tokens = bert_tokenizer(tokens, padding=True, truncation=True, max_length=128, return_tensors='pt')
    encoded_tokens = encoded_tokens.to(device)
    with torch.no_grad():
      model_output1 = model_e(**encoded_tokens)
      tokens_embeddings = max_pooling(model_output1, encoded_tokens['attention_mask'])
      tokens_embeddings = tokens_embeddings.cpu()
      cls_attentions = head[0]
      display(HTML(colorize(tokens, cls_attentions)))
      print("Tokens embeddings:")
      print(tokens_embeddings)
      arg = argmax(tokens_embeddings)
      print('arg max of %s: %d' % (tokens_embeddings, arg))

****************************************************************************************************

Layer 1


Tokens embeddings:
tensor([-1.0233e-02, -3.7405e-01, -1.0000e+09, -1.6655e-01, -4.7442e-01,
        -1.0000e+09, -2.8151e-01, -4.2241e-01, -1.0000e+09, -9.0318e-02,
        -4.7442e-01, -1.0000e+09, -1.5565e-01, -4.3384e-01, -1.0000e+09,
        -3.6203e-01, -4.2606e-01, -1.0000e+09, -3.5217e-01, -1.9592e-01,
        -1.0000e+09, -2.8116e-01, -3.0724e-01, -1.0000e+09, -2.9739e-01,
        -4.0092e-01, -1.0000e+09, -3.5217e-01, -1.3561e-01, -1.0000e+09,
        -3.5217e-01, -4.4164e-01, -1.0000e+09, -3.2856e-01, -3.2176e-01,
        -1.0000e+09, -1.7591e-01, -2.7788e-01, -1.0000e+09, -5.3662e-02,
        -3.6822e-01, -1.0000e+09, -2.1012e-01, -3.3107e-01, -1.0000e+09,
        -2.8299e-01, -5.1984e-01, -1.0000e+09, -2.9988e-01, -3.4121e-01,
        -1.0000e+09,  1.5462e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7405e-01, -1.0000e+09, -1.6655e-01, -4.7442e-01,
        -1.0000e+09, -2.8151e-01, -4.2241e-01, -1.0000e+09, -9.0318e-02,
        -4.7442e-01, -1.0000e+09, -1.5565e-01, -4.3384e-01, -1.0000e+09,
        -3.6203e-01, -4.2606e-01, -1.0000e+09, -3.5217e-01, -1.9592e-01,
        -1.0000e+09, -2.8116e-01, -3.0724e-01, -1.0000e+09, -2.9739e-01,
        -4.0092e-01, -1.0000e+09, -3.5217e-01, -1.3561e-01, -1.0000e+09,
        -3.5217e-01, -4.4164e-01, -1.0000e+09, -3.2856e-01, -3.2176e-01,
        -1.0000e+09, -1.7591e-01, -2.7788e-01, -1.0000e+09, -5.3662e-02,
        -3.6822e-01, -1.0000e+09, -2.1012e-01, -3.3107e-01, -1.0000e+09,
        -2.8299e-01, -5.1984e-01, -1.0000e+09, -2.9988e-01, -3.4121e-01,
        -1.0000e+09,  1.5462e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7405e-01, -1.0000e+09, -1.6655e-01, -4.7442e-01,
        -1.0000e+09, -2.8151e-01, -4.2241e-01, -1.0000e+09, -9.0318e-02,
        -4.7442e-01, -1.0000e+09, -1.5565e-01, -4.3384e-01, -1.0000e+09,
        -3.6203e-01, -4.2606e-01, -1.0000e+09, -3.5217e-01, -1.9592e-01,
        -1.0000e+09, -2.8116e-01, -3.0724e-01, -1.0000e+09, -2.9739e-01,
        -4.0092e-01, -1.0000e+09, -3.5217e-01, -1.3561e-01, -1.0000e+09,
        -3.5217e-01, -4.4164e-01, -1.0000e+09, -3.2856e-01, -3.2176e-01,
        -1.0000e+09, -1.7591e-01, -2.7788e-01, -1.0000e+09, -5.3662e-02,
        -3.6822e-01, -1.0000e+09, -2.1012e-01, -3.3107e-01, -1.0000e+09,
        -2.8299e-01, -5.1984e-01, -1.0000e+09, -2.9988e-01, -3.4121e-01,
        -1.0000e+09,  1.5462e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7405e-01, -1.0000e+09, -1.6655e-01, -4.7442e-01,
        -1.0000e+09, -2.8151e-01, -4.2241e-01, -1.0000e+09, -9.0318e-02,
        -4.7442e-01, -1.0000e+09, -1.5565e-01, -4.3384e-01, -1.0000e+09,
        -3.6203e-01, -4.2606e-01, -1.0000e+09, -3.5217e-01, -1.9592e-01,
        -1.0000e+09, -2.8116e-01, -3.0724e-01, -1.0000e+09, -2.9739e-01,
        -4.0092e-01, -1.0000e+09, -3.5217e-01, -1.3561e-01, -1.0000e+09,
        -3.5217e-01, -4.4164e-01, -1.0000e+09, -3.2856e-01, -3.2176e-01,
        -1.0000e+09, -1.7591e-01, -2.7788e-01, -1.0000e+09, -5.3662e-02,
        -3.6822e-01, -1.0000e+09, -2.1012e-01, -3.3107e-01, -1.0000e+09,
        -2.8299e-01, -5.1984e-01, -1.0000e+09, -2.9988e-01, -3.4121e-01,
        -1.0000e+09,  1.5462e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7405e-01, -1.0000e+09, -1.6655e-01, -4.7442e-01,
        -1.0000e+09, -2.8151e-01, -4.2241e-01, -1.0000e+09, -9.0318e-02,
        -4.7442e-01, -1.0000e+09, -1.5565e-01, -4.3384e-01, -1.0000e+09,
        -3.6203e-01, -4.2606e-01, -1.0000e+09, -3.5217e-01, -1.9592e-01,
        -1.0000e+09, -2.8116e-01, -3.0724e-01, -1.0000e+09, -2.9739e-01,
        -4.0092e-01, -1.0000e+09, -3.5217e-01, -1.3561e-01, -1.0000e+09,
        -3.5217e-01, -4.4164e-01, -1.0000e+09, -3.2856e-01, -3.2176e-01,
        -1.0000e+09, -1.7591e-01, -2.7788e-01, -1.0000e+09, -5.3662e-02,
        -3.6822e-01, -1.0000e+09, -2.1012e-01, -3.3107e-01, -1.0000e+09,
        -2.8299e-01, -5.1984e-01, -1.0000e+09, -2.9988e-01, -3.4121e-01,
        -1.0000e+09,  1.5462e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7405e-01, -1.0000e+09, -1.6655e-01, -4.7442e-01,
        -1.0000e+09, -2.8151e-01, -4.2241e-01, -1.0000e+09, -9.0318e-02,
        -4.7442e-01, -1.0000e+09, -1.5565e-01, -4.3384e-01, -1.0000e+09,
        -3.6203e-01, -4.2606e-01, -1.0000e+09, -3.5217e-01, -1.9592e-01,
        -1.0000e+09, -2.8116e-01, -3.0724e-01, -1.0000e+09, -2.9739e-01,
        -4.0092e-01, -1.0000e+09, -3.5217e-01, -1.3561e-01, -1.0000e+09,
        -3.5217e-01, -4.4164e-01, -1.0000e+09, -3.2856e-01, -3.2176e-01,
        -1.0000e+09, -1.7591e-01, -2.7788e-01, -1.0000e+09, -5.3662e-02,
        -3.6822e-01, -1.0000e+09, -2.1012e-01, -3.3107e-01, -1.0000e+09,
        -2.8299e-01, -5.1984e-01, -1.0000e+09, -2.9988e-01, -3.4121e-01,
        -1.0000e+09,  1.5462e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7405e-01, -1.0000e+09, -1.6655e-01, -4.7442e-01,
        -1.0000e+09, -2.8151e-01, -4.2241e-01, -1.0000e+09, -9.0318e-02,
        -4.7442e-01, -1.0000e+09, -1.5565e-01, -4.3384e-01, -1.0000e+09,
        -3.6203e-01, -4.2606e-01, -1.0000e+09, -3.5217e-01, -1.9592e-01,
        -1.0000e+09, -2.8116e-01, -3.0724e-01, -1.0000e+09, -2.9739e-01,
        -4.0092e-01, -1.0000e+09, -3.5217e-01, -1.3561e-01, -1.0000e+09,
        -3.5217e-01, -4.4164e-01, -1.0000e+09, -3.2856e-01, -3.2176e-01,
        -1.0000e+09, -1.7591e-01, -2.7788e-01, -1.0000e+09, -5.3662e-02,
        -3.6822e-01, -1.0000e+09, -2.1012e-01, -3.3107e-01, -1.0000e+09,
        -2.8299e-01, -5.1984e-01, -1.0000e+09, -2.9988e-01, -3.4121e-01,
        -1.0000e+09,  1.5462e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7405e-01, -1.0000e+09, -1.6655e-01, -4.7442e-01,
        -1.0000e+09, -2.8151e-01, -4.2241e-01, -1.0000e+09, -9.0318e-02,
        -4.7442e-01, -1.0000e+09, -1.5565e-01, -4.3384e-01, -1.0000e+09,
        -3.6203e-01, -4.2606e-01, -1.0000e+09, -3.5217e-01, -1.9592e-01,
        -1.0000e+09, -2.8116e-01, -3.0724e-01, -1.0000e+09, -2.9739e-01,
        -4.0092e-01, -1.0000e+09, -3.5217e-01, -1.3561e-01, -1.0000e+09,
        -3.5217e-01, -4.4164e-01, -1.0000e+09, -3.2856e-01, -3.2176e-01,
        -1.0000e+09, -1.7591e-01, -2.7788e-01, -1.0000e+09, -5.3662e-02,
        -3.6822e-01, -1.0000e+09, -2.1012e-01, -3.3107e-01, -1.0000e+09,
        -2.8299e-01, -5.1984e-01, -1.0000e+09, -2.9988e-01, -3.4121e-01,
        -1.0000e+09,  1.5462e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7405e-01, -1.0000e+09, -1.6655e-01, -4.7442e-01,
        -1.0000e+09, -2.8151e-01, -4.2241e-01, -1.0000e+09, -9.0318e-02,
        -4.7442e-01, -1.0000e+09, -1.5565e-01, -4.3384e-01, -1.0000e+09,
        -3.6203e-01, -4.2606e-01, -1.0000e+09, -3.5217e-01, -1.9592e-01,
        -1.0000e+09, -2.8116e-01, -3.0724e-01, -1.0000e+09, -2.9739e-01,
        -4.0092e-01, -1.0000e+09, -3.5217e-01, -1.3561e-01, -1.0000e+09,
        -3.5217e-01, -4.4164e-01, -1.0000e+09, -3.2856e-01, -3.2176e-01,
        -1.0000e+09, -1.7591e-01, -2.7788e-01, -1.0000e+09, -5.3662e-02,
        -3.6822e-01, -1.0000e+09, -2.1012e-01, -3.3107e-01, -1.0000e+09,
        -2.8299e-01, -5.1984e-01, -1.0000e+09, -2.9988e-01, -3.4121e-01,
        -1.0000e+09,  1.5462e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7405e-01, -1.0000e+09, -1.6655e-01, -4.7442e-01,
        -1.0000e+09, -2.8151e-01, -4.2241e-01, -1.0000e+09, -9.0318e-02,
        -4.7442e-01, -1.0000e+09, -1.5565e-01, -4.3384e-01, -1.0000e+09,
        -3.6203e-01, -4.2606e-01, -1.0000e+09, -3.5217e-01, -1.9592e-01,
        -1.0000e+09, -2.8116e-01, -3.0724e-01, -1.0000e+09, -2.9739e-01,
        -4.0092e-01, -1.0000e+09, -3.5217e-01, -1.3561e-01, -1.0000e+09,
        -3.5217e-01, -4.4164e-01, -1.0000e+09, -3.2856e-01, -3.2176e-01,
        -1.0000e+09, -1.7591e-01, -2.7788e-01, -1.0000e+09, -5.3662e-02,
        -3.6822e-01, -1.0000e+09, -2.1012e-01, -3.3107e-01, -1.0000e+09,
        -2.8299e-01, -5.1984e-01, -1.0000e+09, -2.9988e-01, -3.4121e-01,
        -1.0000e+09,  1.5462e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7405e-01, -1.0000e+09, -1.6655e-01, -4.7442e-01,
        -1.0000e+09, -2.8151e-01, -4.2241e-01, -1.0000e+09, -9.0318e-02,
        -4.7442e-01, -1.0000e+09, -1.5565e-01, -4.3384e-01, -1.0000e+09,
        -3.6203e-01, -4.2606e-01, -1.0000e+09, -3.5217e-01, -1.9592e-01,
        -1.0000e+09, -2.8116e-01, -3.0724e-01, -1.0000e+09, -2.9739e-01,
        -4.0092e-01, -1.0000e+09, -3.5217e-01, -1.3561e-01, -1.0000e+09,
        -3.5217e-01, -4.4164e-01, -1.0000e+09, -3.2856e-01, -3.2176e-01,
        -1.0000e+09, -1.7591e-01, -2.7788e-01, -1.0000e+09, -5.3662e-02,
        -3.6822e-01, -1.0000e+09, -2.1012e-01, -3.3107e-01, -1.0000e+09,
        -2.8299e-01, -5.1984e-01, -1.0000e+09, -2.9988e-01, -3.4121e-01,
        -1.0000e+09,  1.5462e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7405e-01, -1.0000e+09, -1.6655e-01, -4.7442e-01,
        -1.0000e+09, -2.8151e-01, -4.2241e-01, -1.0000e+09, -9.0318e-02,
        -4.7442e-01, -1.0000e+09, -1.5565e-01, -4.3384e-01, -1.0000e+09,
        -3.6203e-01, -4.2606e-01, -1.0000e+09, -3.5217e-01, -1.9592e-01,
        -1.0000e+09, -2.8116e-01, -3.0724e-01, -1.0000e+09, -2.9739e-01,
        -4.0092e-01, -1.0000e+09, -3.5217e-01, -1.3561e-01, -1.0000e+09,
        -3.5217e-01, -4.4164e-01, -1.0000e+09, -3.2856e-01, -3.2176e-01,
        -1.0000e+09, -1.7591e-01, -2.7788e-01, -1.0000e+09, -5.3662e-02,
        -3.6822e-01, -1.0000e+09, -2.1012e-01, -3.3107e-01, -1.0000e+09,
        -2.8299e-01, -5.1984e-01, -1.0000e+09, -2.9988e-01, -3.4121e-01,
        -1.0000e+09,  1.5462e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01, -1.0000e+09, -3.0585e-01, -4.0913e-01,
        -1.0000e+09, -1.9113e-01, -1.8526e-01, -1.0000e+09, -3.1795e-01,
        -3.3419e-01, -1.0000e+09, -3.4240e-01, -2.7229e-01, -1.0000e+09,
        -3.2254e-01, -1.9946e-01, -1.0000e+09, -3.1165e-01, -4.3727e-01,
        -1.0000e+09, -3.5217e-01, -4.5852e-01, -1.0000e+09, -2.3715e-01,
        -2.9212e-01, -1.0000e+09, -1.9113e-01, -4.1190e-01, -1.0000e+09,
        -1.1230e-01, -3.2390e-01, -3.0318e-01, -1.6172e-01, -4.0092e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01, -1.0000e+09, -3.0585e-01, -4.0913e-01,
        -1.0000e+09, -1.9113e-01, -1.8526e-01, -1.0000e+09, -3.1795e-01,
        -3.3419e-01, -1.0000e+09, -3.4240e-01, -2.7229e-01, -1.0000e+09,
        -3.2254e-01, -1.9946e-01, -1.0000e+09, -3.1165e-01, -4.3727e-01,
        -1.0000e+09, -3.5217e-01, -4.5852e-01, -1.0000e+09, -2.3715e-01,
        -2.9212e-01, -1.0000e+09, -1.9113e-01, -4.1190e-01, -1.0000e+09,
        -1.1230e-01, -3.2390e-01, -3.0318e-01, -1.6172e-01, -4.0092e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01, -1.0000e+09, -3.0585e-01, -4.0913e-01,
        -1.0000e+09, -1.9113e-01, -1.8526e-01, -1.0000e+09, -3.1795e-01,
        -3.3419e-01, -1.0000e+09, -3.4240e-01, -2.7229e-01, -1.0000e+09,
        -3.2254e-01, -1.9946e-01, -1.0000e+09, -3.1165e-01, -4.3727e-01,
        -1.0000e+09, -3.5217e-01, -4.5852e-01, -1.0000e+09, -2.3715e-01,
        -2.9212e-01, -1.0000e+09, -1.9113e-01, -4.1190e-01, -1.0000e+09,
        -1.1230e-01, -3.2390e-01, -3.0318e-01, -1.6172e-01, -4.0092e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01, -1.0000e+09, -3.0585e-01, -4.0913e-01,
        -1.0000e+09, -1.9113e-01, -1.8526e-01, -1.0000e+09, -3.1795e-01,
        -3.3419e-01, -1.0000e+09, -3.4240e-01, -2.7229e-01, -1.0000e+09,
        -3.2254e-01, -1.9946e-01, -1.0000e+09, -3.1165e-01, -4.3727e-01,
        -1.0000e+09, -3.5217e-01, -4.5852e-01, -1.0000e+09, -2.3715e-01,
        -2.9212e-01, -1.0000e+09, -1.9113e-01, -4.1190e-01, -1.0000e+09,
        -1.1230e-01, -3.2390e-01, -3.0318e-01, -1.6172e-01, -4.0092e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01, -1.0000e+09, -3.0585e-01, -4.0913e-01,
        -1.0000e+09, -1.9113e-01, -1.8526e-01, -1.0000e+09, -3.1795e-01,
        -3.3419e-01, -1.0000e+09, -3.4240e-01, -2.7229e-01, -1.0000e+09,
        -3.2254e-01, -1.9946e-01, -1.0000e+09, -3.1165e-01, -4.3727e-01,
        -1.0000e+09, -3.5217e-01, -4.5852e-01, -1.0000e+09, -2.3715e-01,
        -2.9212e-01, -1.0000e+09, -1.9113e-01, -4.1190e-01, -1.0000e+09,
        -1.1230e-01, -3.2390e-01, -3.0318e-01, -1.6172e-01, -4.0092e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01, -1.0000e+09, -3.0585e-01, -4.0913e-01,
        -1.0000e+09, -1.9113e-01, -1.8526e-01, -1.0000e+09, -3.1795e-01,
        -3.3419e-01, -1.0000e+09, -3.4240e-01, -2.7229e-01, -1.0000e+09,
        -3.2254e-01, -1.9946e-01, -1.0000e+09, -3.1165e-01, -4.3727e-01,
        -1.0000e+09, -3.5217e-01, -4.5852e-01, -1.0000e+09, -2.3715e-01,
        -2.9212e-01, -1.0000e+09, -1.9113e-01, -4.1190e-01, -1.0000e+09,
        -1.1230e-01, -3.2390e-01, -3.0318e-01, -1.6172e-01, -4.0092e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01, -1.0000e+09, -3.0585e-01, -4.0913e-01,
        -1.0000e+09, -1.9113e-01, -1.8526e-01, -1.0000e+09, -3.1795e-01,
        -3.3419e-01, -1.0000e+09, -3.4240e-01, -2.7229e-01, -1.0000e+09,
        -3.2254e-01, -1.9946e-01, -1.0000e+09, -3.1165e-01, -4.3727e-01,
        -1.0000e+09, -3.5217e-01, -4.5852e-01, -1.0000e+09, -2.3715e-01,
        -2.9212e-01, -1.0000e+09, -1.9113e-01, -4.1190e-01, -1.0000e+09,
        -1.1230e-01, -3.2390e-01, -3.0318e-01, -1.6172e-01, -4.0092e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01, -1.0000e+09, -3.0585e-01, -4.0913e-01,
        -1.0000e+09, -1.9113e-01, -1.8526e-01, -1.0000e+09, -3.1795e-01,
        -3.3419e-01, -1.0000e+09, -3.4240e-01, -2.7229e-01, -1.0000e+09,
        -3.2254e-01, -1.9946e-01, -1.0000e+09, -3.1165e-01, -4.3727e-01,
        -1.0000e+09, -3.5217e-01, -4.5852e-01, -1.0000e+09, -2.3715e-01,
        -2.9212e-01, -1.0000e+09, -1.9113e-01, -4.1190e-01, -1.0000e+09,
        -1.1230e-01, -3.2390e-01, -3.0318e-01, -1.6172e-01, -4.0092e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01, -1.0000e+09, -3.0585e-01, -4.0913e-01,
        -1.0000e+09, -1.9113e-01, -1.8526e-01, -1.0000e+09, -3.1795e-01,
        -3.3419e-01, -1.0000e+09, -3.4240e-01, -2.7229e-01, -1.0000e+09,
        -3.2254e-01, -1.9946e-01, -1.0000e+09, -3.1165e-01, -4.3727e-01,
        -1.0000e+09, -3.5217e-01, -4.5852e-01, -1.0000e+09, -2.3715e-01,
        -2.9212e-01, -1.0000e+09, -1.9113e-01, -4.1190e-01, -1.0000e+09,
        -1.1230e-01, -3.2390e-01, -3.0318e-01, -1.6172e-01, -4.0092e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01, -1.0000e+09, -3.0585e-01, -4.0913e-01,
        -1.0000e+09, -1.9113e-01, -1.8526e-01, -1.0000e+09, -3.1795e-01,
        -3.3419e-01, -1.0000e+09, -3.4240e-01, -2.7229e-01, -1.0000e+09,
        -3.2254e-01, -1.9946e-01, -1.0000e+09, -3.1165e-01, -4.3727e-01,
        -1.0000e+09, -3.5217e-01, -4.5852e-01, -1.0000e+09, -2.3715e-01,
        -2.9212e-01, -1.0000e+09, -1.9113e-01, -4.1190e-01, -1.0000e+09,
        -1.1230e-01, -3.2390e-01, -3.0318e-01, -1.6172e-01, -4.0092e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01, -1.0000e+09, -3.0585e-01, -4.0913e-01,
        -1.0000e+09, -1.9113e-01, -1.8526e-01, -1.0000e+09, -3.1795e-01,
        -3.3419e-01, -1.0000e+09, -3.4240e-01, -2.7229e-01, -1.0000e+09,
        -3.2254e-01, -1.9946e-01, -1.0000e+09, -3.1165e-01, -4.3727e-01,
        -1.0000e+09, -3.5217e-01, -4.5852e-01, -1.0000e+09, -2.3715e-01,
        -2.9212e-01, -1.0000e+09, -1.9113e-01, -4.1190e-01, -1.0000e+09,
        -1.1230e-01, -3.2390e-01, -3.0318e-01, -1.6172e-01, -4.0092e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7905e-01, -1.0000e+09, -3.0585e-01, -4.0913e-01,
        -1.0000e+09, -1.9113e-01, -1.8526e-01, -1.0000e+09, -3.1795e-01,
        -3.3419e-01, -1.0000e+09, -3.4240e-01, -2.7229e-01, -1.0000e+09,
        -3.2254e-01, -1.9946e-01, -1.0000e+09, -3.1165e-01, -4.3727e-01,
        -1.0000e+09, -3.5217e-01, -4.5852e-01, -1.0000e+09, -2.3715e-01,
        -2.9212e-01, -1.0000e+09, -1.9113e-01, -4.1190e-01, -1.0000e+09,
        -1.1230e-01, -3.2390e-01, -3.0318e-01, -1.6172e-01, -4.0092e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.8384e-01, -1.4143e-01, -1.7941e-01, -1.0000e+09,
        -3.3048e-01, -2.7985e-01, -1.3698e-01, -2.3214e-01, -9.0318e-02,
        -2.8659e-01, -3.3423e-01, -2.1613e-01, -3.2243e-01, -1.0000e+09,
        -1.3248e-01, -2.9776e-01, -3.0499e-01, -2.4285e-01, -1.0000e+09,
        -1.7591e-01, -4.3120e-01, -3.4645e-01, -2.6347e-01, -1.0000e+09,
        -3.5217e-01, -2.8613e-01, -1.1230e-01, -2.8126e-01, -1.0000e+09,
        -2.5530e-01, -2.7229e-01, -2.5963e-01, -3.5217e-01, -1.0000e+09,
         1.5462e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -4.8384e-01, -1.4143e-01, -1.7941e-01, -1.0000e+09,
        -3.3048e-01, -2.7985e-01, -1.3698e-01, -2.3214e-01, -9.0318e-02,
        -2.8659e-01, -3.3423e-01, -2.1613e-01, -3.2243e-01, -1.0000e+09,
        -1.3248e-01, -2.9776e-01, -3.0499e-01, -2.4285e-01, -1.0000e+09,
        -1.7591e-01, -4.3120e-01, -3.4645e-01, -2.6347e-01, -1.0000e+09,
        -3.5217e-01, -2.8613e-01, -1.1230e-01, -2.8126e-01, -1.0000e+09,
        -2.5530e-01, -2.7229e-01, -2.5963e-01, -3.5217e-01, -1.0000e+09,
         1.5462e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -4.8384e-01, -1.4143e-01, -1.7941e-01, -1.0000e+09,
        -3.3048e-01, -2.7985e-01, -1.3698e-01, -2.3214e-01, -9.0318e-02,
        -2.8659e-01, -3.3423e-01, -2.1613e-01, -3.2243e-01, -1.0000e+09,
        -1.3248e-01, -2.9776e-01, -3.0499e-01, -2.4285e-01, -1.0000e+09,
        -1.7591e-01, -4.3120e-01, -3.4645e-01, -2.6347e-01, -1.0000e+09,
        -3.5217e-01, -2.8613e-01, -1.1230e-01, -2.8126e-01, -1.0000e+09,
        -2.5530e-01, -2.7229e-01, -2.5963e-01, -3.5217e-01, -1.0000e+09,
         1.5462e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -4.8384e-01, -1.4143e-01, -1.7941e-01, -1.0000e+09,
        -3.3048e-01, -2.7985e-01, -1.3698e-01, -2.3214e-01, -9.0318e-02,
        -2.8659e-01, -3.3423e-01, -2.1613e-01, -3.2243e-01, -1.0000e+09,
        -1.3248e-01, -2.9776e-01, -3.0499e-01, -2.4285e-01, -1.0000e+09,
        -1.7591e-01, -4.3120e-01, -3.4645e-01, -2.6347e-01, -1.0000e+09,
        -3.5217e-01, -2.8613e-01, -1.1230e-01, -2.8126e-01, -1.0000e+09,
        -2.5530e-01, -2.7229e-01, -2.5963e-01, -3.5217e-01, -1.0000e+09,
         1.5462e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -4.8384e-01, -1.4143e-01, -1.7941e-01, -1.0000e+09,
        -3.3048e-01, -2.7985e-01, -1.3698e-01, -2.3214e-01, -9.0318e-02,
        -2.8659e-01, -3.3423e-01, -2.1613e-01, -3.2243e-01, -1.0000e+09,
        -1.3248e-01, -2.9776e-01, -3.0499e-01, -2.4285e-01, -1.0000e+09,
        -1.7591e-01, -4.3120e-01, -3.4645e-01, -2.6347e-01, -1.0000e+09,
        -3.5217e-01, -2.8613e-01, -1.1230e-01, -2.8126e-01, -1.0000e+09,
        -2.5530e-01, -2.7229e-01, -2.5963e-01, -3.5217e-01, -1.0000e+09,
         1.5462e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -4.8384e-01, -1.4143e-01, -1.7941e-01, -1.0000e+09,
        -3.3048e-01, -2.7985e-01, -1.3698e-01, -2.3214e-01, -9.0318e-02,
        -2.8659e-01, -3.3423e-01, -2.1613e-01, -3.2243e-01, -1.0000e+09,
        -1.3248e-01, -2.9776e-01, -3.0499e-01, -2.4285e-01, -1.0000e+09,
        -1.7591e-01, -4.3120e-01, -3.4645e-01, -2.6347e-01, -1.0000e+09,
        -3.5217e-01, -2.8613e-01, -1.1230e-01, -2.8126e-01, -1.0000e+09,
        -2.5530e-01, -2.7229e-01, -2.5963e-01, -3.5217e-01, -1.0000e+09,
         1.5462e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -4.8384e-01, -1.4143e-01, -1.7941e-01, -1.0000e+09,
        -3.3048e-01, -2.7985e-01, -1.3698e-01, -2.3214e-01, -9.0318e-02,
        -2.8659e-01, -3.3423e-01, -2.1613e-01, -3.2243e-01, -1.0000e+09,
        -1.3248e-01, -2.9776e-01, -3.0499e-01, -2.4285e-01, -1.0000e+09,
        -1.7591e-01, -4.3120e-01, -3.4645e-01, -2.6347e-01, -1.0000e+09,
        -3.5217e-01, -2.8613e-01, -1.1230e-01, -2.8126e-01, -1.0000e+09,
        -2.5530e-01, -2.7229e-01, -2.5963e-01, -3.5217e-01, -1.0000e+09,
         1.5462e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -4.8384e-01, -1.4143e-01, -1.7941e-01, -1.0000e+09,
        -3.3048e-01, -2.7985e-01, -1.3698e-01, -2.3214e-01, -9.0318e-02,
        -2.8659e-01, -3.3423e-01, -2.1613e-01, -3.2243e-01, -1.0000e+09,
        -1.3248e-01, -2.9776e-01, -3.0499e-01, -2.4285e-01, -1.0000e+09,
        -1.7591e-01, -4.3120e-01, -3.4645e-01, -2.6347e-01, -1.0000e+09,
        -3.5217e-01, -2.8613e-01, -1.1230e-01, -2.8126e-01, -1.0000e+09,
        -2.5530e-01, -2.7229e-01, -2.5963e-01, -3.5217e-01, -1.0000e+09,
         1.5462e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -4.8384e-01, -1.4143e-01, -1.7941e-01, -1.0000e+09,
        -3.3048e-01, -2.7985e-01, -1.3698e-01, -2.3214e-01, -9.0318e-02,
        -2.8659e-01, -3.3423e-01, -2.1613e-01, -3.2243e-01, -1.0000e+09,
        -1.3248e-01, -2.9776e-01, -3.0499e-01, -2.4285e-01, -1.0000e+09,
        -1.7591e-01, -4.3120e-01, -3.4645e-01, -2.6347e-01, -1.0000e+09,
        -3.5217e-01, -2.8613e-01, -1.1230e-01, -2.8126e-01, -1.0000e+09,
        -2.5530e-01, -2.7229e-01, -2.5963e-01, -3.5217e-01, -1.0000e+09,
         1.5462e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -4.8384e-01, -1.4143e-01, -1.7941e-01, -1.0000e+09,
        -3.3048e-01, -2.7985e-01, -1.3698e-01, -2.3214e-01, -9.0318e-02,
        -2.8659e-01, -3.3423e-01, -2.1613e-01, -3.2243e-01, -1.0000e+09,
        -1.3248e-01, -2.9776e-01, -3.0499e-01, -2.4285e-01, -1.0000e+09,
        -1.7591e-01, -4.3120e-01, -3.4645e-01, -2.6347e-01, -1.0000e+09,
        -3.5217e-01, -2.8613e-01, -1.1230e-01, -2.8126e-01, -1.0000e+09,
        -2.5530e-01, -2.7229e-01, -2.5963e-01, -3.5217e-01, -1.0000e+09,
         1.5462e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -4.8384e-01, -1.4143e-01, -1.7941e-01, -1.0000e+09,
        -3.3048e-01, -2.7985e-01, -1.3698e-01, -2.3214e-01, -9.0318e-02,
        -2.8659e-01, -3.3423e-01, -2.1613e-01, -3.2243e-01, -1.0000e+09,
        -1.3248e-01, -2.9776e-01, -3.0499e-01, -2.4285e-01, -1.0000e+09,
        -1.7591e-01, -4.3120e-01, -3.4645e-01, -2.6347e-01, -1.0000e+09,
        -3.5217e-01, -2.8613e-01, -1.1230e-01, -2.8126e-01, -1.0000e+09,
        -2.5530e-01, -2.7229e-01, -2.5963e-01, -3.5217e-01, -1.0000e+09,
         1.5462e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -4.8384e-01, -1.4143e-01, -1.7941e-01, -1.0000e+09,
        -3.3048e-01, -2.7985e-01, -1.3698e-01, -2.3214e-01, -9.0318e-02,
        -2.8659e-01, -3.3423e-01, -2.1613e-01, -3.2243e-01, -1.0000e+09,
        -1.3248e-01, -2.9776e-01, -3.0499e-01, -2.4285e-01, -1.0000e+09,
        -1.7591e-01, -4.3120e-01, -3.4645e-01, -2.6347e-01, -1.0000e+09,
        -3.5217e-01, -2.8613e-01, -1.1230e-01, -2.8126e-01, -1.0000e+09,
        -2.5530e-01, -2.7229e-01, -2.5963e-01, -3.5217e-01, -1.0000e+09,
         1.5462e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0184e-02, -1.0184e-02, -1.0000e+09,
        -1.0184e-02, -3.7285e-01

Tokens embeddings:
tensor([-1.0233e-02, -4.1190e-01, -1.0000e+09, -2.8000e-01, -4.7436e-01,
        -1.0000e+09, -3.5814e-01, -4.0058e-01, -1.0000e+09, -9.0318e-02,
        -4.1190e-01, -1.0000e+09, -2.5068e-01, -3.9798e-01, -1.0000e+09,
        -1.6423e-01, -3.7905e-01, -1.0000e+09, -2.7793e-01, -4.5358e-01,
        -1.0000e+09, -9.0318e-02, -4.9015e-01, -1.0000e+09, -2.8299e-01,
        -4.3400e-01, -1.0000e+09, -2.9826e-01, -1.8695e-01, -1.0000e+09,
        -3.5120e-01, -4.3088e-01, -1.0000e+09, -2.2084e-01, -2.4775e-01,
        -1.0000e+09, -2.5646e-01, -8.7963e-02, -1.0000e+09, -9.0318e-02,
        -4.1190e-01, -1.0000e+09, -3.0097e-01, -1.9913e-01, -2.9776e-01,
        -2.6534e-01, -2.9669e-01, -9.9006e-02, -1.5565e-01, -2.8116e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.1190e-01, -1.0000e+09, -2.8000e-01, -4.7436e-01,
        -1.0000e+09, -3.5814e-01, -4.0058e-01, -1.0000e+09, -9.0318e-02,
        -4.1190e-01, -1.0000e+09, -2.5068e-01, -3.9798e-01, -1.0000e+09,
        -1.6423e-01, -3.7905e-01, -1.0000e+09, -2.7793e-01, -4.5358e-01,
        -1.0000e+09, -9.0318e-02, -4.9015e-01, -1.0000e+09, -2.8299e-01,
        -4.3400e-01, -1.0000e+09, -2.9826e-01, -1.8695e-01, -1.0000e+09,
        -3.5120e-01, -4.3088e-01, -1.0000e+09, -2.2084e-01, -2.4775e-01,
        -1.0000e+09, -2.5646e-01, -8.7963e-02, -1.0000e+09, -9.0318e-02,
        -4.1190e-01, -1.0000e+09, -3.0097e-01, -1.9913e-01, -2.9776e-01,
        -2.6534e-01, -2.9669e-01, -9.9006e-02, -1.5565e-01, -2.8116e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.1190e-01, -1.0000e+09, -2.8000e-01, -4.7436e-01,
        -1.0000e+09, -3.5814e-01, -4.0058e-01, -1.0000e+09, -9.0318e-02,
        -4.1190e-01, -1.0000e+09, -2.5068e-01, -3.9798e-01, -1.0000e+09,
        -1.6423e-01, -3.7905e-01, -1.0000e+09, -2.7793e-01, -4.5358e-01,
        -1.0000e+09, -9.0318e-02, -4.9015e-01, -1.0000e+09, -2.8299e-01,
        -4.3400e-01, -1.0000e+09, -2.9826e-01, -1.8695e-01, -1.0000e+09,
        -3.5120e-01, -4.3088e-01, -1.0000e+09, -2.2084e-01, -2.4775e-01,
        -1.0000e+09, -2.5646e-01, -8.7963e-02, -1.0000e+09, -9.0318e-02,
        -4.1190e-01, -1.0000e+09, -3.0097e-01, -1.9913e-01, -2.9776e-01,
        -2.6534e-01, -2.9669e-01, -9.9006e-02, -1.5565e-01, -2.8116e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.1190e-01, -1.0000e+09, -2.8000e-01, -4.7436e-01,
        -1.0000e+09, -3.5814e-01, -4.0058e-01, -1.0000e+09, -9.0318e-02,
        -4.1190e-01, -1.0000e+09, -2.5068e-01, -3.9798e-01, -1.0000e+09,
        -1.6423e-01, -3.7905e-01, -1.0000e+09, -2.7793e-01, -4.5358e-01,
        -1.0000e+09, -9.0318e-02, -4.9015e-01, -1.0000e+09, -2.8299e-01,
        -4.3400e-01, -1.0000e+09, -2.9826e-01, -1.8695e-01, -1.0000e+09,
        -3.5120e-01, -4.3088e-01, -1.0000e+09, -2.2084e-01, -2.4775e-01,
        -1.0000e+09, -2.5646e-01, -8.7963e-02, -1.0000e+09, -9.0318e-02,
        -4.1190e-01, -1.0000e+09, -3.0097e-01, -1.9913e-01, -2.9776e-01,
        -2.6534e-01, -2.9669e-01, -9.9006e-02, -1.5565e-01, -2.8116e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.1190e-01, -1.0000e+09, -2.8000e-01, -4.7436e-01,
        -1.0000e+09, -3.5814e-01, -4.0058e-01, -1.0000e+09, -9.0318e-02,
        -4.1190e-01, -1.0000e+09, -2.5068e-01, -3.9798e-01, -1.0000e+09,
        -1.6423e-01, -3.7905e-01, -1.0000e+09, -2.7793e-01, -4.5358e-01,
        -1.0000e+09, -9.0318e-02, -4.9015e-01, -1.0000e+09, -2.8299e-01,
        -4.3400e-01, -1.0000e+09, -2.9826e-01, -1.8695e-01, -1.0000e+09,
        -3.5120e-01, -4.3088e-01, -1.0000e+09, -2.2084e-01, -2.4775e-01,
        -1.0000e+09, -2.5646e-01, -8.7963e-02, -1.0000e+09, -9.0318e-02,
        -4.1190e-01, -1.0000e+09, -3.0097e-01, -1.9913e-01, -2.9776e-01,
        -2.6534e-01, -2.9669e-01, -9.9006e-02, -1.5565e-01, -2.8116e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.1190e-01, -1.0000e+09, -2.8000e-01, -4.7436e-01,
        -1.0000e+09, -3.5814e-01, -4.0058e-01, -1.0000e+09, -9.0318e-02,
        -4.1190e-01, -1.0000e+09, -2.5068e-01, -3.9798e-01, -1.0000e+09,
        -1.6423e-01, -3.7905e-01, -1.0000e+09, -2.7793e-01, -4.5358e-01,
        -1.0000e+09, -9.0318e-02, -4.9015e-01, -1.0000e+09, -2.8299e-01,
        -4.3400e-01, -1.0000e+09, -2.9826e-01, -1.8695e-01, -1.0000e+09,
        -3.5120e-01, -4.3088e-01, -1.0000e+09, -2.2084e-01, -2.4775e-01,
        -1.0000e+09, -2.5646e-01, -8.7963e-02, -1.0000e+09, -9.0318e-02,
        -4.1190e-01, -1.0000e+09, -3.0097e-01, -1.9913e-01, -2.9776e-01,
        -2.6534e-01, -2.9669e-01, -9.9006e-02, -1.5565e-01, -2.8116e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.1190e-01, -1.0000e+09, -2.8000e-01, -4.7436e-01,
        -1.0000e+09, -3.5814e-01, -4.0058e-01, -1.0000e+09, -9.0318e-02,
        -4.1190e-01, -1.0000e+09, -2.5068e-01, -3.9798e-01, -1.0000e+09,
        -1.6423e-01, -3.7905e-01, -1.0000e+09, -2.7793e-01, -4.5358e-01,
        -1.0000e+09, -9.0318e-02, -4.9015e-01, -1.0000e+09, -2.8299e-01,
        -4.3400e-01, -1.0000e+09, -2.9826e-01, -1.8695e-01, -1.0000e+09,
        -3.5120e-01, -4.3088e-01, -1.0000e+09, -2.2084e-01, -2.4775e-01,
        -1.0000e+09, -2.5646e-01, -8.7963e-02, -1.0000e+09, -9.0318e-02,
        -4.1190e-01, -1.0000e+09, -3.0097e-01, -1.9913e-01, -2.9776e-01,
        -2.6534e-01, -2.9669e-01, -9.9006e-02, -1.5565e-01, -2.8116e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.1190e-01, -1.0000e+09, -2.8000e-01, -4.7436e-01,
        -1.0000e+09, -3.5814e-01, -4.0058e-01, -1.0000e+09, -9.0318e-02,
        -4.1190e-01, -1.0000e+09, -2.5068e-01, -3.9798e-01, -1.0000e+09,
        -1.6423e-01, -3.7905e-01, -1.0000e+09, -2.7793e-01, -4.5358e-01,
        -1.0000e+09, -9.0318e-02, -4.9015e-01, -1.0000e+09, -2.8299e-01,
        -4.3400e-01, -1.0000e+09, -2.9826e-01, -1.8695e-01, -1.0000e+09,
        -3.5120e-01, -4.3088e-01, -1.0000e+09, -2.2084e-01, -2.4775e-01,
        -1.0000e+09, -2.5646e-01, -8.7963e-02, -1.0000e+09, -9.0318e-02,
        -4.1190e-01, -1.0000e+09, -3.0097e-01, -1.9913e-01, -2.9776e-01,
        -2.6534e-01, -2.9669e-01, -9.9006e-02, -1.5565e-01, -2.8116e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.1190e-01, -1.0000e+09, -2.8000e-01, -4.7436e-01,
        -1.0000e+09, -3.5814e-01, -4.0058e-01, -1.0000e+09, -9.0318e-02,
        -4.1190e-01, -1.0000e+09, -2.5068e-01, -3.9798e-01, -1.0000e+09,
        -1.6423e-01, -3.7905e-01, -1.0000e+09, -2.7793e-01, -4.5358e-01,
        -1.0000e+09, -9.0318e-02, -4.9015e-01, -1.0000e+09, -2.8299e-01,
        -4.3400e-01, -1.0000e+09, -2.9826e-01, -1.8695e-01, -1.0000e+09,
        -3.5120e-01, -4.3088e-01, -1.0000e+09, -2.2084e-01, -2.4775e-01,
        -1.0000e+09, -2.5646e-01, -8.7963e-02, -1.0000e+09, -9.0318e-02,
        -4.1190e-01, -1.0000e+09, -3.0097e-01, -1.9913e-01, -2.9776e-01,
        -2.6534e-01, -2.9669e-01, -9.9006e-02, -1.5565e-01, -2.8116e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.1190e-01, -1.0000e+09, -2.8000e-01, -4.7436e-01,
        -1.0000e+09, -3.5814e-01, -4.0058e-01, -1.0000e+09, -9.0318e-02,
        -4.1190e-01, -1.0000e+09, -2.5068e-01, -3.9798e-01, -1.0000e+09,
        -1.6423e-01, -3.7905e-01, -1.0000e+09, -2.7793e-01, -4.5358e-01,
        -1.0000e+09, -9.0318e-02, -4.9015e-01, -1.0000e+09, -2.8299e-01,
        -4.3400e-01, -1.0000e+09, -2.9826e-01, -1.8695e-01, -1.0000e+09,
        -3.5120e-01, -4.3088e-01, -1.0000e+09, -2.2084e-01, -2.4775e-01,
        -1.0000e+09, -2.5646e-01, -8.7963e-02, -1.0000e+09, -9.0318e-02,
        -4.1190e-01, -1.0000e+09, -3.0097e-01, -1.9913e-01, -2.9776e-01,
        -2.6534e-01, -2.9669e-01, -9.9006e-02, -1.5565e-01, -2.8116e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.1190e-01, -1.0000e+09, -2.8000e-01, -4.7436e-01,
        -1.0000e+09, -3.5814e-01, -4.0058e-01, -1.0000e+09, -9.0318e-02,
        -4.1190e-01, -1.0000e+09, -2.5068e-01, -3.9798e-01, -1.0000e+09,
        -1.6423e-01, -3.7905e-01, -1.0000e+09, -2.7793e-01, -4.5358e-01,
        -1.0000e+09, -9.0318e-02, -4.9015e-01, -1.0000e+09, -2.8299e-01,
        -4.3400e-01, -1.0000e+09, -2.9826e-01, -1.8695e-01, -1.0000e+09,
        -3.5120e-01, -4.3088e-01, -1.0000e+09, -2.2084e-01, -2.4775e-01,
        -1.0000e+09, -2.5646e-01, -8.7963e-02, -1.0000e+09, -9.0318e-02,
        -4.1190e-01, -1.0000e+09, -3.0097e-01, -1.9913e-01, -2.9776e-01,
        -2.6534e-01, -2.9669e-01, -9.9006e-02, -1.5565e-01, -2.8116e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -4.1190e-01, -1.0000e+09, -2.8000e-01, -4.7436e-01,
        -1.0000e+09, -3.5814e-01, -4.0058e-01, -1.0000e+09, -9.0318e-02,
        -4.1190e-01, -1.0000e+09, -2.5068e-01, -3.9798e-01, -1.0000e+09,
        -1.6423e-01, -3.7905e-01, -1.0000e+09, -2.7793e-01, -4.5358e-01,
        -1.0000e+09, -9.0318e-02, -4.9015e-01, -1.0000e+09, -2.8299e-01,
        -4.3400e-01, -1.0000e+09, -2.9826e-01, -1.8695e-01, -1.0000e+09,
        -3.5120e-01, -4.3088e-01, -1.0000e+09, -2.2084e-01, -2.4775e-01,
        -1.0000e+09, -2.5646e-01, -8.7963e-02, -1.0000e+09, -9.0318e-02,
        -4.1190e-01, -1.0000e+09, -3.0097e-01, -1.9913e-01, -2.9776e-01,
        -2.6534e-01, -2.9669e-01, -9.9006e-02, -1.5565e-01, -2.8116e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -2.9776e-01, -1.0000e+09, -3.1027e-01, -3.4286e-01,
        -1.0000e+09, -3.1391e-01, -4.1190e-01, -1.0000e+09, -2.9492e-01,
        -2.7229e-01, -1.0000e+09, -3.1090e-01, -3.1394e-01, -3.1947e-01,
        -2.9975e-01, -1.3248e-01, -2.5114e-01, -1.6655e-01, -2.0984e-01,
        -1.0000e+09, -2.8151e-01, -4.0635e-01, -1.0000e+09, -3.5217e-01,
        -3.4879e-01, -1.0000e+09, -3.1066e-01, -1.4116e-01, -1.0000e+09,
        -2.7190e-01, -3.0494e-01, -1.0000e+09,  1.5462e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -2.9776e-01, -1.0000e+09, -3.1027e-01, -3.4286e-01,
        -1.0000e+09, -3.1391e-01, -4.1190e-01, -1.0000e+09, -2.9492e-01,
        -2.7229e-01, -1.0000e+09, -3.1090e-01, -3.1394e-01, -3.1947e-01,
        -2.9975e-01, -1.3248e-01, -2.5114e-01, -1.6655e-01, -2.0984e-01,
        -1.0000e+09, -2.8151e-01, -4.0635e-01, -1.0000e+09, -3.5217e-01,
        -3.4879e-01, -1.0000e+09, -3.1066e-01, -1.4116e-01, -1.0000e+09,
        -2.7190e-01, -3.0494e-01, -1.0000e+09,  1.5462e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -2.9776e-01, -1.0000e+09, -3.1027e-01, -3.4286e-01,
        -1.0000e+09, -3.1391e-01, -4.1190e-01, -1.0000e+09, -2.9492e-01,
        -2.7229e-01, -1.0000e+09, -3.1090e-01, -3.1394e-01, -3.1947e-01,
        -2.9975e-01, -1.3248e-01, -2.5114e-01, -1.6655e-01, -2.0984e-01,
        -1.0000e+09, -2.8151e-01, -4.0635e-01, -1.0000e+09, -3.5217e-01,
        -3.4879e-01, -1.0000e+09, -3.1066e-01, -1.4116e-01, -1.0000e+09,
        -2.7190e-01, -3.0494e-01, -1.0000e+09,  1.5462e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -2.9776e-01, -1.0000e+09, -3.1027e-01, -3.4286e-01,
        -1.0000e+09, -3.1391e-01, -4.1190e-01, -1.0000e+09, -2.9492e-01,
        -2.7229e-01, -1.0000e+09, -3.1090e-01, -3.1394e-01, -3.1947e-01,
        -2.9975e-01, -1.3248e-01, -2.5114e-01, -1.6655e-01, -2.0984e-01,
        -1.0000e+09, -2.8151e-01, -4.0635e-01, -1.0000e+09, -3.5217e-01,
        -3.4879e-01, -1.0000e+09, -3.1066e-01, -1.4116e-01, -1.0000e+09,
        -2.7190e-01, -3.0494e-01, -1.0000e+09,  1.5462e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -2.9776e-01, -1.0000e+09, -3.1027e-01, -3.4286e-01,
        -1.0000e+09, -3.1391e-01, -4.1190e-01, -1.0000e+09, -2.9492e-01,
        -2.7229e-01, -1.0000e+09, -3.1090e-01, -3.1394e-01, -3.1947e-01,
        -2.9975e-01, -1.3248e-01, -2.5114e-01, -1.6655e-01, -2.0984e-01,
        -1.0000e+09, -2.8151e-01, -4.0635e-01, -1.0000e+09, -3.5217e-01,
        -3.4879e-01, -1.0000e+09, -3.1066e-01, -1.4116e-01, -1.0000e+09,
        -2.7190e-01, -3.0494e-01, -1.0000e+09,  1.5462e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -2.9776e-01, -1.0000e+09, -3.1027e-01, -3.4286e-01,
        -1.0000e+09, -3.1391e-01, -4.1190e-01, -1.0000e+09, -2.9492e-01,
        -2.7229e-01, -1.0000e+09, -3.1090e-01, -3.1394e-01, -3.1947e-01,
        -2.9975e-01, -1.3248e-01, -2.5114e-01, -1.6655e-01, -2.0984e-01,
        -1.0000e+09, -2.8151e-01, -4.0635e-01, -1.0000e+09, -3.5217e-01,
        -3.4879e-01, -1.0000e+09, -3.1066e-01, -1.4116e-01, -1.0000e+09,
        -2.7190e-01, -3.0494e-01, -1.0000e+09,  1.5462e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -2.9776e-01, -1.0000e+09, -3.1027e-01, -3.4286e-01,
        -1.0000e+09, -3.1391e-01, -4.1190e-01, -1.0000e+09, -2.9492e-01,
        -2.7229e-01, -1.0000e+09, -3.1090e-01, -3.1394e-01, -3.1947e-01,
        -2.9975e-01, -1.3248e-01, -2.5114e-01, -1.6655e-01, -2.0984e-01,
        -1.0000e+09, -2.8151e-01, -4.0635e-01, -1.0000e+09, -3.5217e-01,
        -3.4879e-01, -1.0000e+09, -3.1066e-01, -1.4116e-01, -1.0000e+09,
        -2.7190e-01, -3.0494e-01, -1.0000e+09,  1.5462e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -2.9776e-01, -1.0000e+09, -3.1027e-01, -3.4286e-01,
        -1.0000e+09, -3.1391e-01, -4.1190e-01, -1.0000e+09, -2.9492e-01,
        -2.7229e-01, -1.0000e+09, -3.1090e-01, -3.1394e-01, -3.1947e-01,
        -2.9975e-01, -1.3248e-01, -2.5114e-01, -1.6655e-01, -2.0984e-01,
        -1.0000e+09, -2.8151e-01, -4.0635e-01, -1.0000e+09, -3.5217e-01,
        -3.4879e-01, -1.0000e+09, -3.1066e-01, -1.4116e-01, -1.0000e+09,
        -2.7190e-01, -3.0494e-01, -1.0000e+09,  1.5462e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -2.9776e-01, -1.0000e+09, -3.1027e-01, -3.4286e-01,
        -1.0000e+09, -3.1391e-01, -4.1190e-01, -1.0000e+09, -2.9492e-01,
        -2.7229e-01, -1.0000e+09, -3.1090e-01, -3.1394e-01, -3.1947e-01,
        -2.9975e-01, -1.3248e-01, -2.5114e-01, -1.6655e-01, -2.0984e-01,
        -1.0000e+09, -2.8151e-01, -4.0635e-01, -1.0000e+09, -3.5217e-01,
        -3.4879e-01, -1.0000e+09, -3.1066e-01, -1.4116e-01, -1.0000e+09,
        -2.7190e-01, -3.0494e-01, -1.0000e+09,  1.5462e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -2.9776e-01, -1.0000e+09, -3.1027e-01, -3.4286e-01,
        -1.0000e+09, -3.1391e-01, -4.1190e-01, -1.0000e+09, -2.9492e-01,
        -2.7229e-01, -1.0000e+09, -3.1090e-01, -3.1394e-01, -3.1947e-01,
        -2.9975e-01, -1.3248e-01, -2.5114e-01, -1.6655e-01, -2.0984e-01,
        -1.0000e+09, -2.8151e-01, -4.0635e-01, -1.0000e+09, -3.5217e-01,
        -3.4879e-01, -1.0000e+09, -3.1066e-01, -1.4116e-01, -1.0000e+09,
        -2.7190e-01, -3.0494e-01, -1.0000e+09,  1.5462e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -2.9776e-01, -1.0000e+09, -3.1027e-01, -3.4286e-01,
        -1.0000e+09, -3.1391e-01, -4.1190e-01, -1.0000e+09, -2.9492e-01,
        -2.7229e-01, -1.0000e+09, -3.1090e-01, -3.1394e-01, -3.1947e-01,
        -2.9975e-01, -1.3248e-01, -2.5114e-01, -1.6655e-01, -2.0984e-01,
        -1.0000e+09, -2.8151e-01, -4.0635e-01, -1.0000e+09, -3.5217e-01,
        -3.4879e-01, -1.0000e+09, -3.1066e-01, -1.4116e-01, -1.0000e+09,
        -2.7190e-01, -3.0494e-01, -1.0000e+09,  1.5462e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -2.9776e-01, -1.0000e+09, -3.1027e-01, -3.4286e-01,
        -1.0000e+09, -3.1391e-01, -4.1190e-01, -1.0000e+09, -2.9492e-01,
        -2.7229e-01, -1.0000e+09, -3.1090e-01, -3.1394e-01, -3.1947e-01,
        -2.9975e-01, -1.3248e-01, -2.5114e-01, -1.6655e-01, -2.0984e-01,
        -1.0000e+09, -2.8151e-01, -4.0635e-01, -1.0000e+09, -3.5217e-01,
        -3.4879e-01, -1.0000e+09, -3.1066e-01, -1.4116e-01, -1.0000e+09,
        -2.7190e-01, -3.0494e-01, -1.0000e+09,  1.5462e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-0.0102, -0.1557, -0.1557, -0.1733, -0.3224, -0.2731, -0.1723, -0.2141,
        -0.0719, -0.3917, -0.0310, -0.3286, -0.2428, -0.1557, -0.1759, -0.3281,
        -0.2050,  0.0443, -0.1557,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.1557, -0.1557, -0.1733, -0.3224, -0.2731, -0.1723, -0.2141,
        -0.0719, -0.3917, -0.0310, -

Tokens embeddings:
tensor([-0.0102, -0.1557, -0.1557, -0.1733, -0.3224, -0.2731, -0.1723, -0.2141,
        -0.0719, -0.3917, -0.0310, -0.3286, -0.2428, -0.1557, -0.1759, -0.3281,
        -0.2050,  0.0443, -0.1557,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.1557, -0.1557, -0.1733, -0.3224, -0.2731, -0.1723, -0.2141,
        -0.0719, -0.3917, -0.0310, -

Tokens embeddings:
tensor([-0.0102, -0.1557, -0.1557, -0.1733, -0.3224, -0.2731, -0.1723, -0.2141,
        -0.0719, -0.3917, -0.0310, -0.3286, -0.2428, -0.1557, -0.1759, -0.3281,
        -0.2050,  0.0443, -0.1557,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.1557, -0.1557, -0.1733, -0.3224, -0.2731, -0.1723, -0.2141,
        -0.0719, -0.3917, -0.0310, -

Tokens embeddings:
tensor([-0.0102, -0.1557, -0.1557, -0.1733, -0.3224, -0.2731, -0.1723, -0.2141,
        -0.0719, -0.3917, -0.0310, -0.3286, -0.2428, -0.1557, -0.1759, -0.3281,
        -0.2050,  0.0443, -0.1557,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.1557, -0.1557, -0.1733, -0.3224, -0.2731, -0.1723, -0.2141,
        -0.0719, -0.3917, -0.0310, -

Tokens embeddings:
tensor([-0.0102, -0.1557, -0.1557, -0.1733, -0.3224, -0.2731, -0.1723, -0.2141,
        -0.0719, -0.3917, -0.0310, -0.3286, -0.2428, -0.1557, -0.1759, -0.3281,
        -0.2050,  0.0443, -0.1557,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.1557, -0.1557, -0.1733, -0.3224, -0.2731, -0.1723, -0.2141,
        -0.0719, -0.3917, -0.0310, -

Tokens embeddings:
tensor([-0.0102, -0.1557, -0.1557, -0.1733, -0.3224, -0.2731, -0.1723, -0.2141,
        -0.0719, -0.3917, -0.0310, -0.3286, -0.2428, -0.1557, -0.1759, -0.3281,
        -0.2050,  0.0443, -0.1557,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.1557, -0.1557, -0.1733, -0.3224, -0.2731, -0.1723, -0.2141,
        -0.0719, -0.3917, -0.0310, -

Tokens embeddings:
tensor([-0.0102, -0.1557, -0.1557, -0.1733, -0.3224, -0.2731, -0.1723, -0.2141,
        -0.0719, -0.3917, -0.0310, -0.3286, -0.2428, -0.1557, -0.1759, -0.3281,
        -0.2050,  0.0443, -0.1557,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.1557, -0.1557, -0.1733, -0.3224, -0.2731, -0.1723, -0.2141,
        -0.0719, -0.3917, -0.0310, -

Tokens embeddings:
tensor([-0.0102, -0.1557, -0.1557, -0.1733, -0.3224, -0.2731, -0.1723, -0.2141,
        -0.0719, -0.3917, -0.0310, -0.3286, -0.2428, -0.1557, -0.1759, -0.3281,
        -0.2050,  0.0443, -0.1557,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.1557, -0.1557, -0.1733, -0.3224, -0.2731, -0.1723, -0.2141,
        -0.0719, -0.3917, -0.0310, -

Tokens embeddings:
tensor([-0.0102, -0.1557, -0.1557, -0.1733, -0.3224, -0.2731, -0.1723, -0.2141,
        -0.0719, -0.3917, -0.0310, -0.3286, -0.2428, -0.1557, -0.1759, -0.3281,
        -0.2050,  0.0443, -0.1557,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.1557, -0.1557, -0.1733, -0.3224, -0.2731, -0.1723, -0.2141,
        -0.0719, -0.3917, -0.0310, -

Tokens embeddings:
tensor([-0.0102, -0.1557, -0.1557, -0.1733, -0.3224, -0.2731, -0.1723, -0.2141,
        -0.0719, -0.3917, -0.0310, -0.3286, -0.2428, -0.1557, -0.1759, -0.3281,
        -0.2050,  0.0443, -0.1557,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.1557, -0.1557, -0.1733, -0.3224, -0.2731, -0.1723, -0.2141,
        -0.0719, -0.3917, -0.0310, -

Tokens embeddings:
tensor([-0.0102, -0.1557, -0.1557, -0.1733, -0.3224, -0.2731, -0.1723, -0.2141,
        -0.0719, -0.3917, -0.0310, -0.3286, -0.2428, -0.1557, -0.1759, -0.3281,
        -0.2050,  0.0443, -0.1557,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.1557, -0.1557, -0.1733, -0.3224, -0.2731, -0.1723, -0.2141,
        -0.0719, -0.3917, -0.0310, -

Tokens embeddings:
tensor([-0.0102, -0.1557, -0.1557, -0.1733, -0.3224, -0.2731, -0.1723, -0.2141,
        -0.0719, -0.3917, -0.0310, -0.3286, -0.2428, -0.1557, -0.1759, -0.3281,
        -0.2050,  0.0443, -0.1557,  0.0155, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.1557, -0.1557, -0.1733, -0.3224, -0.2731, -0.1723, -0.2141,
        -0.0719, -0.3917, -0.0310, -

Tokens embeddings:
tensor([-1.0233e-02, -2.9776e-01, -1.0000e+09, -2.8116e-01, -4.7238e-01,
        -1.0000e+09, -9.0318e-02, -2.5809e-01, -1.0000e+09, -1.3248e-01,
        -3.6371e-01, -1.0000e+09, -2.5532e-01, -2.7229e-01, -1.0000e+09,
        -2.6416e-01, -4.7442e-01, -1.0000e+09, -2.8151e-01, -4.5808e-01,
        -1.0000e+09, -1.2304e-01, -4.3775e-01, -1.0000e+09, -2.5571e-01,
        -2.0016e-01, -1.0000e+09, -9.0318e-02, -5.0984e-01, -1.0000e+09,
        -3.1615e-01, -3.7307e-01, -1.0000e+09, -2.7131e-01, -4.9470e-01,
        -1.0000e+09, -1.5336e-01, -2.4753e-01, -1.0000e+09, -2.8299e-01,
        -5.2124e-01, -1.0000e+09, -1.6590e-01, -4.2241e-01, -1.0000e+09,
        -4.8486e-02, -2.2133e-01, -1.0000e+09, -2.9970e-01, -4.7241e-01,
        -1.0000e+09, -4.9477e-02, -3.7405e-01, -1.0000e+09, -3.3221e-01,
        -2.7229e-01, -1.0000e+09, -3.7490e-01, -4.1085e-01, -1.0000e+09,
        -2.8151e-01, -3.1405e-01, -1.0000e+09, -1.1230e-01, -1.5565e-01,
        -1.5565e-01, -2.7175e-01

Tokens embeddings:
tensor([-1.0233e-02, -2.9776e-01, -1.0000e+09, -2.8116e-01, -4.7238e-01,
        -1.0000e+09, -9.0318e-02, -2.5809e-01, -1.0000e+09, -1.3248e-01,
        -3.6371e-01, -1.0000e+09, -2.5532e-01, -2.7229e-01, -1.0000e+09,
        -2.6416e-01, -4.7442e-01, -1.0000e+09, -2.8151e-01, -4.5808e-01,
        -1.0000e+09, -1.2304e-01, -4.3775e-01, -1.0000e+09, -2.5571e-01,
        -2.0016e-01, -1.0000e+09, -9.0318e-02, -5.0984e-01, -1.0000e+09,
        -3.1615e-01, -3.7307e-01, -1.0000e+09, -2.7131e-01, -4.9470e-01,
        -1.0000e+09, -1.5336e-01, -2.4753e-01, -1.0000e+09, -2.8299e-01,
        -5.2124e-01, -1.0000e+09, -1.6590e-01, -4.2241e-01, -1.0000e+09,
        -4.8486e-02, -2.2133e-01, -1.0000e+09, -2.9970e-01, -4.7241e-01,
        -1.0000e+09, -4.9477e-02, -3.7405e-01, -1.0000e+09, -3.3221e-01,
        -2.7229e-01, -1.0000e+09, -3.7490e-01, -4.1085e-01, -1.0000e+09,
        -2.8151e-01, -3.1405e-01, -1.0000e+09, -1.1230e-01, -1.5565e-01,
        -1.5565e-01, -2.7175e-01

Tokens embeddings:
tensor([-1.0233e-02, -2.9776e-01, -1.0000e+09, -2.8116e-01, -4.7238e-01,
        -1.0000e+09, -9.0318e-02, -2.5809e-01, -1.0000e+09, -1.3248e-01,
        -3.6371e-01, -1.0000e+09, -2.5532e-01, -2.7229e-01, -1.0000e+09,
        -2.6416e-01, -4.7442e-01, -1.0000e+09, -2.8151e-01, -4.5808e-01,
        -1.0000e+09, -1.2304e-01, -4.3775e-01, -1.0000e+09, -2.5571e-01,
        -2.0016e-01, -1.0000e+09, -9.0318e-02, -5.0984e-01, -1.0000e+09,
        -3.1615e-01, -3.7307e-01, -1.0000e+09, -2.7131e-01, -4.9470e-01,
        -1.0000e+09, -1.5336e-01, -2.4753e-01, -1.0000e+09, -2.8299e-01,
        -5.2124e-01, -1.0000e+09, -1.6590e-01, -4.2241e-01, -1.0000e+09,
        -4.8486e-02, -2.2133e-01, -1.0000e+09, -2.9970e-01, -4.7241e-01,
        -1.0000e+09, -4.9477e-02, -3.7405e-01, -1.0000e+09, -3.3221e-01,
        -2.7229e-01, -1.0000e+09, -3.7490e-01, -4.1085e-01, -1.0000e+09,
        -2.8151e-01, -3.1405e-01, -1.0000e+09, -1.1230e-01, -1.5565e-01,
        -1.5565e-01, -2.7175e-01

Tokens embeddings:
tensor([-1.0233e-02, -2.9776e-01, -1.0000e+09, -2.8116e-01, -4.7238e-01,
        -1.0000e+09, -9.0318e-02, -2.5809e-01, -1.0000e+09, -1.3248e-01,
        -3.6371e-01, -1.0000e+09, -2.5532e-01, -2.7229e-01, -1.0000e+09,
        -2.6416e-01, -4.7442e-01, -1.0000e+09, -2.8151e-01, -4.5808e-01,
        -1.0000e+09, -1.2304e-01, -4.3775e-01, -1.0000e+09, -2.5571e-01,
        -2.0016e-01, -1.0000e+09, -9.0318e-02, -5.0984e-01, -1.0000e+09,
        -3.1615e-01, -3.7307e-01, -1.0000e+09, -2.7131e-01, -4.9470e-01,
        -1.0000e+09, -1.5336e-01, -2.4753e-01, -1.0000e+09, -2.8299e-01,
        -5.2124e-01, -1.0000e+09, -1.6590e-01, -4.2241e-01, -1.0000e+09,
        -4.8486e-02, -2.2133e-01, -1.0000e+09, -2.9970e-01, -4.7241e-01,
        -1.0000e+09, -4.9477e-02, -3.7405e-01, -1.0000e+09, -3.3221e-01,
        -2.7229e-01, -1.0000e+09, -3.7490e-01, -4.1085e-01, -1.0000e+09,
        -2.8151e-01, -3.1405e-01, -1.0000e+09, -1.1230e-01, -1.5565e-01,
        -1.5565e-01, -2.7175e-01

Tokens embeddings:
tensor([-1.0233e-02, -2.9776e-01, -1.0000e+09, -2.8116e-01, -4.7238e-01,
        -1.0000e+09, -9.0318e-02, -2.5809e-01, -1.0000e+09, -1.3248e-01,
        -3.6371e-01, -1.0000e+09, -2.5532e-01, -2.7229e-01, -1.0000e+09,
        -2.6416e-01, -4.7442e-01, -1.0000e+09, -2.8151e-01, -4.5808e-01,
        -1.0000e+09, -1.2304e-01, -4.3775e-01, -1.0000e+09, -2.5571e-01,
        -2.0016e-01, -1.0000e+09, -9.0318e-02, -5.0984e-01, -1.0000e+09,
        -3.1615e-01, -3.7307e-01, -1.0000e+09, -2.7131e-01, -4.9470e-01,
        -1.0000e+09, -1.5336e-01, -2.4753e-01, -1.0000e+09, -2.8299e-01,
        -5.2124e-01, -1.0000e+09, -1.6590e-01, -4.2241e-01, -1.0000e+09,
        -4.8486e-02, -2.2133e-01, -1.0000e+09, -2.9970e-01, -4.7241e-01,
        -1.0000e+09, -4.9477e-02, -3.7405e-01, -1.0000e+09, -3.3221e-01,
        -2.7229e-01, -1.0000e+09, -3.7490e-01, -4.1085e-01, -1.0000e+09,
        -2.8151e-01, -3.1405e-01, -1.0000e+09, -1.1230e-01, -1.5565e-01,
        -1.5565e-01, -2.7175e-01

Tokens embeddings:
tensor([-1.0233e-02, -2.9776e-01, -1.0000e+09, -2.8116e-01, -4.7238e-01,
        -1.0000e+09, -9.0318e-02, -2.5809e-01, -1.0000e+09, -1.3248e-01,
        -3.6371e-01, -1.0000e+09, -2.5532e-01, -2.7229e-01, -1.0000e+09,
        -2.6416e-01, -4.7442e-01, -1.0000e+09, -2.8151e-01, -4.5808e-01,
        -1.0000e+09, -1.2304e-01, -4.3775e-01, -1.0000e+09, -2.5571e-01,
        -2.0016e-01, -1.0000e+09, -9.0318e-02, -5.0984e-01, -1.0000e+09,
        -3.1615e-01, -3.7307e-01, -1.0000e+09, -2.7131e-01, -4.9470e-01,
        -1.0000e+09, -1.5336e-01, -2.4753e-01, -1.0000e+09, -2.8299e-01,
        -5.2124e-01, -1.0000e+09, -1.6590e-01, -4.2241e-01, -1.0000e+09,
        -4.8486e-02, -2.2133e-01, -1.0000e+09, -2.9970e-01, -4.7241e-01,
        -1.0000e+09, -4.9477e-02, -3.7405e-01, -1.0000e+09, -3.3221e-01,
        -2.7229e-01, -1.0000e+09, -3.7490e-01, -4.1085e-01, -1.0000e+09,
        -2.8151e-01, -3.1405e-01, -1.0000e+09, -1.1230e-01, -1.5565e-01,
        -1.5565e-01, -2.7175e-01

Tokens embeddings:
tensor([-1.0233e-02, -2.9776e-01, -1.0000e+09, -2.8116e-01, -4.7238e-01,
        -1.0000e+09, -9.0318e-02, -2.5809e-01, -1.0000e+09, -1.3248e-01,
        -3.6371e-01, -1.0000e+09, -2.5532e-01, -2.7229e-01, -1.0000e+09,
        -2.6416e-01, -4.7442e-01, -1.0000e+09, -2.8151e-01, -4.5808e-01,
        -1.0000e+09, -1.2304e-01, -4.3775e-01, -1.0000e+09, -2.5571e-01,
        -2.0016e-01, -1.0000e+09, -9.0318e-02, -5.0984e-01, -1.0000e+09,
        -3.1615e-01, -3.7307e-01, -1.0000e+09, -2.7131e-01, -4.9470e-01,
        -1.0000e+09, -1.5336e-01, -2.4753e-01, -1.0000e+09, -2.8299e-01,
        -5.2124e-01, -1.0000e+09, -1.6590e-01, -4.2241e-01, -1.0000e+09,
        -4.8486e-02, -2.2133e-01, -1.0000e+09, -2.9970e-01, -4.7241e-01,
        -1.0000e+09, -4.9477e-02, -3.7405e-01, -1.0000e+09, -3.3221e-01,
        -2.7229e-01, -1.0000e+09, -3.7490e-01, -4.1085e-01, -1.0000e+09,
        -2.8151e-01, -3.1405e-01, -1.0000e+09, -1.1230e-01, -1.5565e-01,
        -1.5565e-01, -2.7175e-01

Tokens embeddings:
tensor([-1.0233e-02, -2.9776e-01, -1.0000e+09, -2.8116e-01, -4.7238e-01,
        -1.0000e+09, -9.0318e-02, -2.5809e-01, -1.0000e+09, -1.3248e-01,
        -3.6371e-01, -1.0000e+09, -2.5532e-01, -2.7229e-01, -1.0000e+09,
        -2.6416e-01, -4.7442e-01, -1.0000e+09, -2.8151e-01, -4.5808e-01,
        -1.0000e+09, -1.2304e-01, -4.3775e-01, -1.0000e+09, -2.5571e-01,
        -2.0016e-01, -1.0000e+09, -9.0318e-02, -5.0984e-01, -1.0000e+09,
        -3.1615e-01, -3.7307e-01, -1.0000e+09, -2.7131e-01, -4.9470e-01,
        -1.0000e+09, -1.5336e-01, -2.4753e-01, -1.0000e+09, -2.8299e-01,
        -5.2124e-01, -1.0000e+09, -1.6590e-01, -4.2241e-01, -1.0000e+09,
        -4.8486e-02, -2.2133e-01, -1.0000e+09, -2.9970e-01, -4.7241e-01,
        -1.0000e+09, -4.9477e-02, -3.7405e-01, -1.0000e+09, -3.3221e-01,
        -2.7229e-01, -1.0000e+09, -3.7490e-01, -4.1085e-01, -1.0000e+09,
        -2.8151e-01, -3.1405e-01, -1.0000e+09, -1.1230e-01, -1.5565e-01,
        -1.5565e-01, -2.7175e-01

Tokens embeddings:
tensor([-1.0233e-02, -2.9776e-01, -1.0000e+09, -2.8116e-01, -4.7238e-01,
        -1.0000e+09, -9.0318e-02, -2.5809e-01, -1.0000e+09, -1.3248e-01,
        -3.6371e-01, -1.0000e+09, -2.5532e-01, -2.7229e-01, -1.0000e+09,
        -2.6416e-01, -4.7442e-01, -1.0000e+09, -2.8151e-01, -4.5808e-01,
        -1.0000e+09, -1.2304e-01, -4.3775e-01, -1.0000e+09, -2.5571e-01,
        -2.0016e-01, -1.0000e+09, -9.0318e-02, -5.0984e-01, -1.0000e+09,
        -3.1615e-01, -3.7307e-01, -1.0000e+09, -2.7131e-01, -4.9470e-01,
        -1.0000e+09, -1.5336e-01, -2.4753e-01, -1.0000e+09, -2.8299e-01,
        -5.2124e-01, -1.0000e+09, -1.6590e-01, -4.2241e-01, -1.0000e+09,
        -4.8486e-02, -2.2133e-01, -1.0000e+09, -2.9970e-01, -4.7241e-01,
        -1.0000e+09, -4.9477e-02, -3.7405e-01, -1.0000e+09, -3.3221e-01,
        -2.7229e-01, -1.0000e+09, -3.7490e-01, -4.1085e-01, -1.0000e+09,
        -2.8151e-01, -3.1405e-01, -1.0000e+09, -1.1230e-01, -1.5565e-01,
        -1.5565e-01, -2.7175e-01

Tokens embeddings:
tensor([-1.0233e-02, -2.9776e-01, -1.0000e+09, -2.8116e-01, -4.7238e-01,
        -1.0000e+09, -9.0318e-02, -2.5809e-01, -1.0000e+09, -1.3248e-01,
        -3.6371e-01, -1.0000e+09, -2.5532e-01, -2.7229e-01, -1.0000e+09,
        -2.6416e-01, -4.7442e-01, -1.0000e+09, -2.8151e-01, -4.5808e-01,
        -1.0000e+09, -1.2304e-01, -4.3775e-01, -1.0000e+09, -2.5571e-01,
        -2.0016e-01, -1.0000e+09, -9.0318e-02, -5.0984e-01, -1.0000e+09,
        -3.1615e-01, -3.7307e-01, -1.0000e+09, -2.7131e-01, -4.9470e-01,
        -1.0000e+09, -1.5336e-01, -2.4753e-01, -1.0000e+09, -2.8299e-01,
        -5.2124e-01, -1.0000e+09, -1.6590e-01, -4.2241e-01, -1.0000e+09,
        -4.8486e-02, -2.2133e-01, -1.0000e+09, -2.9970e-01, -4.7241e-01,
        -1.0000e+09, -4.9477e-02, -3.7405e-01, -1.0000e+09, -3.3221e-01,
        -2.7229e-01, -1.0000e+09, -3.7490e-01, -4.1085e-01, -1.0000e+09,
        -2.8151e-01, -3.1405e-01, -1.0000e+09, -1.1230e-01, -1.5565e-01,
        -1.5565e-01, -2.7175e-01

Tokens embeddings:
tensor([-1.0233e-02, -2.9776e-01, -1.0000e+09, -2.8116e-01, -4.7238e-01,
        -1.0000e+09, -9.0318e-02, -2.5809e-01, -1.0000e+09, -1.3248e-01,
        -3.6371e-01, -1.0000e+09, -2.5532e-01, -2.7229e-01, -1.0000e+09,
        -2.6416e-01, -4.7442e-01, -1.0000e+09, -2.8151e-01, -4.5808e-01,
        -1.0000e+09, -1.2304e-01, -4.3775e-01, -1.0000e+09, -2.5571e-01,
        -2.0016e-01, -1.0000e+09, -9.0318e-02, -5.0984e-01, -1.0000e+09,
        -3.1615e-01, -3.7307e-01, -1.0000e+09, -2.7131e-01, -4.9470e-01,
        -1.0000e+09, -1.5336e-01, -2.4753e-01, -1.0000e+09, -2.8299e-01,
        -5.2124e-01, -1.0000e+09, -1.6590e-01, -4.2241e-01, -1.0000e+09,
        -4.8486e-02, -2.2133e-01, -1.0000e+09, -2.9970e-01, -4.7241e-01,
        -1.0000e+09, -4.9477e-02, -3.7405e-01, -1.0000e+09, -3.3221e-01,
        -2.7229e-01, -1.0000e+09, -3.7490e-01, -4.1085e-01, -1.0000e+09,
        -2.8151e-01, -3.1405e-01, -1.0000e+09, -1.1230e-01, -1.5565e-01,
        -1.5565e-01, -2.7175e-01

Tokens embeddings:
tensor([-1.0233e-02, -2.9776e-01, -1.0000e+09, -2.8116e-01, -4.7238e-01,
        -1.0000e+09, -9.0318e-02, -2.5809e-01, -1.0000e+09, -1.3248e-01,
        -3.6371e-01, -1.0000e+09, -2.5532e-01, -2.7229e-01, -1.0000e+09,
        -2.6416e-01, -4.7442e-01, -1.0000e+09, -2.8151e-01, -4.5808e-01,
        -1.0000e+09, -1.2304e-01, -4.3775e-01, -1.0000e+09, -2.5571e-01,
        -2.0016e-01, -1.0000e+09, -9.0318e-02, -5.0984e-01, -1.0000e+09,
        -3.1615e-01, -3.7307e-01, -1.0000e+09, -2.7131e-01, -4.9470e-01,
        -1.0000e+09, -1.5336e-01, -2.4753e-01, -1.0000e+09, -2.8299e-01,
        -5.2124e-01, -1.0000e+09, -1.6590e-01, -4.2241e-01, -1.0000e+09,
        -4.8486e-02, -2.2133e-01, -1.0000e+09, -2.9970e-01, -4.7241e-01,
        -1.0000e+09, -4.9477e-02, -3.7405e-01, -1.0000e+09, -3.3221e-01,
        -2.7229e-01, -1.0000e+09, -3.7490e-01, -4.1085e-01, -1.0000e+09,
        -2.8151e-01, -3.1405e-01, -1.0000e+09, -1.1230e-01, -1.5565e-01,
        -1.5565e-01, -2.7175e-01

Tokens embeddings:
tensor([-0.0102, -0.2830, -0.4333, -0.2869,  0.0131, -0.2673, -0.3123, -0.0903,
        -0.2742, -0.1325, -0.3522, -0.1042, -0.3322, -0.2428, -0.1557, -0.1759,
        -0.3126, -0.2689, -0.3339, -0.2558, -0.3339, -0.2776, -0.3163, -0.2811,
        -0.2560, -0.0903, -0.3557, -0.3000, -0.2830, -0.2788, -0.1738, -0.3139,
        -0.2572, -0.3209, -0.3210, -0.0903, -0.3192, -0.2862,  0.0155, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2830, -0.4333, -0.2869,  0.0131, -0.2673, -0.3123, -0.0903,
        -0.2742, -0.1325, -0.3522, -

Tokens embeddings:
tensor([-0.0102, -0.2830, -0.4333, -0.2869,  0.0131, -0.2673, -0.3123, -0.0903,
        -0.2742, -0.1325, -0.3522, -0.1042, -0.3322, -0.2428, -0.1557, -0.1759,
        -0.3126, -0.2689, -0.3339, -0.2558, -0.3339, -0.2776, -0.3163, -0.2811,
        -0.2560, -0.0903, -0.3557, -0.3000, -0.2830, -0.2788, -0.1738, -0.3139,
        -0.2572, -0.3209, -0.3210, -0.0903, -0.3192, -0.2862,  0.0155, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2830, -0.4333, -0.2869,  0.0131, -0.2673, -0.3123, -0.0903,
        -0.2742, -0.1325, -0.3522, -

Tokens embeddings:
tensor([-0.0102, -0.2830, -0.4333, -0.2869,  0.0131, -0.2673, -0.3123, -0.0903,
        -0.2742, -0.1325, -0.3522, -0.1042, -0.3322, -0.2428, -0.1557, -0.1759,
        -0.3126, -0.2689, -0.3339, -0.2558, -0.3339, -0.2776, -0.3163, -0.2811,
        -0.2560, -0.0903, -0.3557, -0.3000, -0.2830, -0.2788, -0.1738, -0.3139,
        -0.2572, -0.3209, -0.3210, -0.0903, -0.3192, -0.2862,  0.0155, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2830, -0.4333, -0.2869,  0.0131, -0.2673, -0.3123, -0.0903,
        -0.2742, -0.1325, -0.3522, -

Tokens embeddings:
tensor([-0.0102, -0.2830, -0.4333, -0.2869,  0.0131, -0.2673, -0.3123, -0.0903,
        -0.2742, -0.1325, -0.3522, -0.1042, -0.3322, -0.2428, -0.1557, -0.1759,
        -0.3126, -0.2689, -0.3339, -0.2558, -0.3339, -0.2776, -0.3163, -0.2811,
        -0.2560, -0.0903, -0.3557, -0.3000, -0.2830, -0.2788, -0.1738, -0.3139,
        -0.2572, -0.3209, -0.3210, -0.0903, -0.3192, -0.2862,  0.0155, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2830, -0.4333, -0.2869,  0.0131, -0.2673, -0.3123, -0.0903,
        -0.2742, -0.1325, -0.3522, -

Tokens embeddings:
tensor([-0.0102, -0.2830, -0.4333, -0.2869,  0.0131, -0.2673, -0.3123, -0.0903,
        -0.2742, -0.1325, -0.3522, -0.1042, -0.3322, -0.2428, -0.1557, -0.1759,
        -0.3126, -0.2689, -0.3339, -0.2558, -0.3339, -0.2776, -0.3163, -0.2811,
        -0.2560, -0.0903, -0.3557, -0.3000, -0.2830, -0.2788, -0.1738, -0.3139,
        -0.2572, -0.3209, -0.3210, -0.0903, -0.3192, -0.2862,  0.0155, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2830, -0.4333, -0.2869,  0.0131, -0.2673, -0.3123, -0.0903,
        -0.2742, -0.1325, -0.3522, -

Tokens embeddings:
tensor([-0.0102, -0.2830, -0.4333, -0.2869,  0.0131, -0.2673, -0.3123, -0.0903,
        -0.2742, -0.1325, -0.3522, -0.1042, -0.3322, -0.2428, -0.1557, -0.1759,
        -0.3126, -0.2689, -0.3339, -0.2558, -0.3339, -0.2776, -0.3163, -0.2811,
        -0.2560, -0.0903, -0.3557, -0.3000, -0.2830, -0.2788, -0.1738, -0.3139,
        -0.2572, -0.3209, -0.3210, -0.0903, -0.3192, -0.2862,  0.0155, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2830, -0.4333, -0.2869,  0.0131, -0.2673, -0.3123, -0.0903,
        -0.2742, -0.1325, -0.3522, -

Tokens embeddings:
tensor([-0.0102, -0.2830, -0.4333, -0.2869,  0.0131, -0.2673, -0.3123, -0.0903,
        -0.2742, -0.1325, -0.3522, -0.1042, -0.3322, -0.2428, -0.1557, -0.1759,
        -0.3126, -0.2689, -0.3339, -0.2558, -0.3339, -0.2776, -0.3163, -0.2811,
        -0.2560, -0.0903, -0.3557, -0.3000, -0.2830, -0.2788, -0.1738, -0.3139,
        -0.2572, -0.3209, -0.3210, -0.0903, -0.3192, -0.2862,  0.0155, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2830, -0.4333, -0.2869,  0.0131, -0.2673, -0.3123, -0.0903,
        -0.2742, -0.1325, -0.3522, -

Tokens embeddings:
tensor([-0.0102, -0.2830, -0.4333, -0.2869,  0.0131, -0.2673, -0.3123, -0.0903,
        -0.2742, -0.1325, -0.3522, -0.1042, -0.3322, -0.2428, -0.1557, -0.1759,
        -0.3126, -0.2689, -0.3339, -0.2558, -0.3339, -0.2776, -0.3163, -0.2811,
        -0.2560, -0.0903, -0.3557, -0.3000, -0.2830, -0.2788, -0.1738, -0.3139,
        -0.2572, -0.3209, -0.3210, -0.0903, -0.3192, -0.2862,  0.0155, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2830, -0.4333, -0.2869,  0.0131, -0.2673, -0.3123, -0.0903,
        -0.2742, -0.1325, -0.3522, -

Tokens embeddings:
tensor([-0.0102, -0.2830, -0.4333, -0.2869,  0.0131, -0.2673, -0.3123, -0.0903,
        -0.2742, -0.1325, -0.3522, -0.1042, -0.3322, -0.2428, -0.1557, -0.1759,
        -0.3126, -0.2689, -0.3339, -0.2558, -0.3339, -0.2776, -0.3163, -0.2811,
        -0.2560, -0.0903, -0.3557, -0.3000, -0.2830, -0.2788, -0.1738, -0.3139,
        -0.2572, -0.3209, -0.3210, -0.0903, -0.3192, -0.2862,  0.0155, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2830, -0.4333, -0.2869,  0.0131, -0.2673, -0.3123, -0.0903,
        -0.2742, -0.1325, -0.3522, -

Tokens embeddings:
tensor([-0.0102, -0.2830, -0.4333, -0.2869,  0.0131, -0.2673, -0.3123, -0.0903,
        -0.2742, -0.1325, -0.3522, -0.1042, -0.3322, -0.2428, -0.1557, -0.1759,
        -0.3126, -0.2689, -0.3339, -0.2558, -0.3339, -0.2776, -0.3163, -0.2811,
        -0.2560, -0.0903, -0.3557, -0.3000, -0.2830, -0.2788, -0.1738, -0.3139,
        -0.2572, -0.3209, -0.3210, -0.0903, -0.3192, -0.2862,  0.0155, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2830, -0.4333, -0.2869,  0.0131, -0.2673, -0.3123, -0.0903,
        -0.2742, -0.1325, -0.3522, -

Tokens embeddings:
tensor([-0.0102, -0.2830, -0.4333, -0.2869,  0.0131, -0.2673, -0.3123, -0.0903,
        -0.2742, -0.1325, -0.3522, -0.1042, -0.3322, -0.2428, -0.1557, -0.1759,
        -0.3126, -0.2689, -0.3339, -0.2558, -0.3339, -0.2776, -0.3163, -0.2811,
        -0.2560, -0.0903, -0.3557, -0.3000, -0.2830, -0.2788, -0.1738, -0.3139,
        -0.2572, -0.3209, -0.3210, -0.0903, -0.3192, -0.2862,  0.0155, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2830, -0.4333, -0.2869,  0.0131, -0.2673, -0.3123, -0.0903,
        -0.2742, -0.1325, -0.3522, -

Tokens embeddings:
tensor([-0.0102, -0.2830, -0.4333, -0.2869,  0.0131, -0.2673, -0.3123, -0.0903,
        -0.2742, -0.1325, -0.3522, -0.1042, -0.3322, -0.2428, -0.1557, -0.1759,
        -0.3126, -0.2689, -0.3339, -0.2558, -0.3339, -0.2776, -0.3163, -0.2811,
        -0.2560, -0.0903, -0.3557, -0.3000, -0.2830, -0.2788, -0.1738, -0.3139,
        -0.2572, -0.3209, -0.3210, -0.0903, -0.3192, -0.2862,  0.0155, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2830, -0.4333, -0.2869,  0.0131, -0.2673, -0.3123, -0.0903,
        -0.2742, -0.1325, -0.3522, -

Tokens embeddings:
tensor([-1.0233e-02, -3.7863e-01, -1.0000e+09, -3.2633e-01, -4.5808e-01,
        -1.0000e+09, -1.0590e-01, -1.7369e-01, -1.0000e+09, -9.0318e-02,
        -3.6494e-01, -1.0000e+09, -2.5939e-01, -4.5808e-01, -1.0000e+09,
        -3.2808e-01, -4.2241e-01, -1.0000e+09, -1.1230e-01, -1.8983e-01,
        -2.7645e-01, -3.2175e-01, -1.1484e-01, -2.4663e-01, -2.8431e-01,
        -2.0761e-01, -1.0000e+09, -2.7733e-01, -6.0716e-01, -1.0000e+09,
        -3.5333e-01, -4.1190e-01, -1.0000e+09, -2.2910e-01, -5.1037e-01,
        -1.0000e+09, -2.8108e-01, -2.8116e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7863e-01, -1.0000e+09, -3.2633e-01, -4.5808e-01,
        -1.0000e+09, -1.0590e-01, -1.7369e-01, -1.0000e+09, -9.0318e-02,
        -3.6494e-01, -1.0000e+09, -2.5939e-01, -4.5808e-01, -1.0000e+09,
        -3.2808e-01, -4.2241e-01, -1.0000e+09, -1.1230e-01, -1.8983e-01,
        -2.7645e-01, -3.2175e-01, -1.1484e-01, -2.4663e-01, -2.8431e-01,
        -2.0761e-01, -1.0000e+09, -2.7733e-01, -6.0716e-01, -1.0000e+09,
        -3.5333e-01, -4.1190e-01, -1.0000e+09, -2.2910e-01, -5.1037e-01,
        -1.0000e+09, -2.8108e-01, -2.8116e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7863e-01, -1.0000e+09, -3.2633e-01, -4.5808e-01,
        -1.0000e+09, -1.0590e-01, -1.7369e-01, -1.0000e+09, -9.0318e-02,
        -3.6494e-01, -1.0000e+09, -2.5939e-01, -4.5808e-01, -1.0000e+09,
        -3.2808e-01, -4.2241e-01, -1.0000e+09, -1.1230e-01, -1.8983e-01,
        -2.7645e-01, -3.2175e-01, -1.1484e-01, -2.4663e-01, -2.8431e-01,
        -2.0761e-01, -1.0000e+09, -2.7733e-01, -6.0716e-01, -1.0000e+09,
        -3.5333e-01, -4.1190e-01, -1.0000e+09, -2.2910e-01, -5.1037e-01,
        -1.0000e+09, -2.8108e-01, -2.8116e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7863e-01, -1.0000e+09, -3.2633e-01, -4.5808e-01,
        -1.0000e+09, -1.0590e-01, -1.7369e-01, -1.0000e+09, -9.0318e-02,
        -3.6494e-01, -1.0000e+09, -2.5939e-01, -4.5808e-01, -1.0000e+09,
        -3.2808e-01, -4.2241e-01, -1.0000e+09, -1.1230e-01, -1.8983e-01,
        -2.7645e-01, -3.2175e-01, -1.1484e-01, -2.4663e-01, -2.8431e-01,
        -2.0761e-01, -1.0000e+09, -2.7733e-01, -6.0716e-01, -1.0000e+09,
        -3.5333e-01, -4.1190e-01, -1.0000e+09, -2.2910e-01, -5.1037e-01,
        -1.0000e+09, -2.8108e-01, -2.8116e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7863e-01, -1.0000e+09, -3.2633e-01, -4.5808e-01,
        -1.0000e+09, -1.0590e-01, -1.7369e-01, -1.0000e+09, -9.0318e-02,
        -3.6494e-01, -1.0000e+09, -2.5939e-01, -4.5808e-01, -1.0000e+09,
        -3.2808e-01, -4.2241e-01, -1.0000e+09, -1.1230e-01, -1.8983e-01,
        -2.7645e-01, -3.2175e-01, -1.1484e-01, -2.4663e-01, -2.8431e-01,
        -2.0761e-01, -1.0000e+09, -2.7733e-01, -6.0716e-01, -1.0000e+09,
        -3.5333e-01, -4.1190e-01, -1.0000e+09, -2.2910e-01, -5.1037e-01,
        -1.0000e+09, -2.8108e-01, -2.8116e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7863e-01, -1.0000e+09, -3.2633e-01, -4.5808e-01,
        -1.0000e+09, -1.0590e-01, -1.7369e-01, -1.0000e+09, -9.0318e-02,
        -3.6494e-01, -1.0000e+09, -2.5939e-01, -4.5808e-01, -1.0000e+09,
        -3.2808e-01, -4.2241e-01, -1.0000e+09, -1.1230e-01, -1.8983e-01,
        -2.7645e-01, -3.2175e-01, -1.1484e-01, -2.4663e-01, -2.8431e-01,
        -2.0761e-01, -1.0000e+09, -2.7733e-01, -6.0716e-01, -1.0000e+09,
        -3.5333e-01, -4.1190e-01, -1.0000e+09, -2.2910e-01, -5.1037e-01,
        -1.0000e+09, -2.8108e-01, -2.8116e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7863e-01, -1.0000e+09, -3.2633e-01, -4.5808e-01,
        -1.0000e+09, -1.0590e-01, -1.7369e-01, -1.0000e+09, -9.0318e-02,
        -3.6494e-01, -1.0000e+09, -2.5939e-01, -4.5808e-01, -1.0000e+09,
        -3.2808e-01, -4.2241e-01, -1.0000e+09, -1.1230e-01, -1.8983e-01,
        -2.7645e-01, -3.2175e-01, -1.1484e-01, -2.4663e-01, -2.8431e-01,
        -2.0761e-01, -1.0000e+09, -2.7733e-01, -6.0716e-01, -1.0000e+09,
        -3.5333e-01, -4.1190e-01, -1.0000e+09, -2.2910e-01, -5.1037e-01,
        -1.0000e+09, -2.8108e-01, -2.8116e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7863e-01, -1.0000e+09, -3.2633e-01, -4.5808e-01,
        -1.0000e+09, -1.0590e-01, -1.7369e-01, -1.0000e+09, -9.0318e-02,
        -3.6494e-01, -1.0000e+09, -2.5939e-01, -4.5808e-01, -1.0000e+09,
        -3.2808e-01, -4.2241e-01, -1.0000e+09, -1.1230e-01, -1.8983e-01,
        -2.7645e-01, -3.2175e-01, -1.1484e-01, -2.4663e-01, -2.8431e-01,
        -2.0761e-01, -1.0000e+09, -2.7733e-01, -6.0716e-01, -1.0000e+09,
        -3.5333e-01, -4.1190e-01, -1.0000e+09, -2.2910e-01, -5.1037e-01,
        -1.0000e+09, -2.8108e-01, -2.8116e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7863e-01, -1.0000e+09, -3.2633e-01, -4.5808e-01,
        -1.0000e+09, -1.0590e-01, -1.7369e-01, -1.0000e+09, -9.0318e-02,
        -3.6494e-01, -1.0000e+09, -2.5939e-01, -4.5808e-01, -1.0000e+09,
        -3.2808e-01, -4.2241e-01, -1.0000e+09, -1.1230e-01, -1.8983e-01,
        -2.7645e-01, -3.2175e-01, -1.1484e-01, -2.4663e-01, -2.8431e-01,
        -2.0761e-01, -1.0000e+09, -2.7733e-01, -6.0716e-01, -1.0000e+09,
        -3.5333e-01, -4.1190e-01, -1.0000e+09, -2.2910e-01, -5.1037e-01,
        -1.0000e+09, -2.8108e-01, -2.8116e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7863e-01, -1.0000e+09, -3.2633e-01, -4.5808e-01,
        -1.0000e+09, -1.0590e-01, -1.7369e-01, -1.0000e+09, -9.0318e-02,
        -3.6494e-01, -1.0000e+09, -2.5939e-01, -4.5808e-01, -1.0000e+09,
        -3.2808e-01, -4.2241e-01, -1.0000e+09, -1.1230e-01, -1.8983e-01,
        -2.7645e-01, -3.2175e-01, -1.1484e-01, -2.4663e-01, -2.8431e-01,
        -2.0761e-01, -1.0000e+09, -2.7733e-01, -6.0716e-01, -1.0000e+09,
        -3.5333e-01, -4.1190e-01, -1.0000e+09, -2.2910e-01, -5.1037e-01,
        -1.0000e+09, -2.8108e-01, -2.8116e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7863e-01, -1.0000e+09, -3.2633e-01, -4.5808e-01,
        -1.0000e+09, -1.0590e-01, -1.7369e-01, -1.0000e+09, -9.0318e-02,
        -3.6494e-01, -1.0000e+09, -2.5939e-01, -4.5808e-01, -1.0000e+09,
        -3.2808e-01, -4.2241e-01, -1.0000e+09, -1.1230e-01, -1.8983e-01,
        -2.7645e-01, -3.2175e-01, -1.1484e-01, -2.4663e-01, -2.8431e-01,
        -2.0761e-01, -1.0000e+09, -2.7733e-01, -6.0716e-01, -1.0000e+09,
        -3.5333e-01, -4.1190e-01, -1.0000e+09, -2.2910e-01, -5.1037e-01,
        -1.0000e+09, -2.8108e-01, -2.8116e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-1.0233e-02, -3.7863e-01, -1.0000e+09, -3.2633e-01, -4.5808e-01,
        -1.0000e+09, -1.0590e-01, -1.7369e-01, -1.0000e+09, -9.0318e-02,
        -3.6494e-01, -1.0000e+09, -2.5939e-01, -4.5808e-01, -1.0000e+09,
        -3.2808e-01, -4.2241e-01, -1.0000e+09, -1.1230e-01, -1.8983e-01,
        -2.7645e-01, -3.2175e-01, -1.1484e-01, -2.4663e-01, -2.8431e-01,
        -2.0761e-01, -1.0000e+09, -2.7733e-01, -6.0716e-01, -1.0000e+09,
        -3.5333e-01, -4.1190e-01, -1.0000e+09, -2.2910e-01, -5.1037e-01,
        -1.0000e+09, -2.8108e-01, -2.8116e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09, -1.0184e-02,
        -3.7285e-01, -1.0000e+09, -1.0184e-02, -3.7285e-01, -1.0000e+09,
        -1.0184e-02, -1.0184e-02, -3.7285e-01, -1.0184e-02, -3.7285e-01,
        -1.0000e+09, -1.0184e-02

Tokens embeddings:
tensor([-0.0102, -0.2124, -0.1665, -0.2569, -0.3522, -0.2519, -0.2705, -0.2830,
        -0.2825, -0.3074, -0.1557, -0.1557, -0.2735, -0.2193, -0.3281, -0.3628,
        -0.3012, -0.3432, -0.2139, -0.3356, -0.2955,  0.0443, -0.1557,  0.0155,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2124, -0.1665, -0.2569, -0.3522, -0.2519, -0.2705, -0.2830,
        -0.2825, -0.3074, -0.1557, -

Tokens embeddings:
tensor([-0.0102, -0.2124, -0.1665, -0.2569, -0.3522, -0.2519, -0.2705, -0.2830,
        -0.2825, -0.3074, -0.1557, -0.1557, -0.2735, -0.2193, -0.3281, -0.3628,
        -0.3012, -0.3432, -0.2139, -0.3356, -0.2955,  0.0443, -0.1557,  0.0155,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2124, -0.1665, -0.2569, -0.3522, -0.2519, -0.2705, -0.2830,
        -0.2825, -0.3074, -0.1557, -

Tokens embeddings:
tensor([-0.0102, -0.2124, -0.1665, -0.2569, -0.3522, -0.2519, -0.2705, -0.2830,
        -0.2825, -0.3074, -0.1557, -0.1557, -0.2735, -0.2193, -0.3281, -0.3628,
        -0.3012, -0.3432, -0.2139, -0.3356, -0.2955,  0.0443, -0.1557,  0.0155,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2124, -0.1665, -0.2569, -0.3522, -0.2519, -0.2705, -0.2830,
        -0.2825, -0.3074, -0.1557, -

Tokens embeddings:
tensor([-0.0102, -0.2124, -0.1665, -0.2569, -0.3522, -0.2519, -0.2705, -0.2830,
        -0.2825, -0.3074, -0.1557, -0.1557, -0.2735, -0.2193, -0.3281, -0.3628,
        -0.3012, -0.3432, -0.2139, -0.3356, -0.2955,  0.0443, -0.1557,  0.0155,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2124, -0.1665, -0.2569, -0.3522, -0.2519, -0.2705, -0.2830,
        -0.2825, -0.3074, -0.1557, -

Tokens embeddings:
tensor([-0.0102, -0.2124, -0.1665, -0.2569, -0.3522, -0.2519, -0.2705, -0.2830,
        -0.2825, -0.3074, -0.1557, -0.1557, -0.2735, -0.2193, -0.3281, -0.3628,
        -0.3012, -0.3432, -0.2139, -0.3356, -0.2955,  0.0443, -0.1557,  0.0155,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2124, -0.1665, -0.2569, -0.3522, -0.2519, -0.2705, -0.2830,
        -0.2825, -0.3074, -0.1557, -

Tokens embeddings:
tensor([-0.0102, -0.2124, -0.1665, -0.2569, -0.3522, -0.2519, -0.2705, -0.2830,
        -0.2825, -0.3074, -0.1557, -0.1557, -0.2735, -0.2193, -0.3281, -0.3628,
        -0.3012, -0.3432, -0.2139, -0.3356, -0.2955,  0.0443, -0.1557,  0.0155,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2124, -0.1665, -0.2569, -0.3522, -0.2519, -0.2705, -0.2830,
        -0.2825, -0.3074, -0.1557, -

Tokens embeddings:
tensor([-0.0102, -0.2124, -0.1665, -0.2569, -0.3522, -0.2519, -0.2705, -0.2830,
        -0.2825, -0.3074, -0.1557, -0.1557, -0.2735, -0.2193, -0.3281, -0.3628,
        -0.3012, -0.3432, -0.2139, -0.3356, -0.2955,  0.0443, -0.1557,  0.0155,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2124, -0.1665, -0.2569, -0.3522, -0.2519, -0.2705, -0.2830,
        -0.2825, -0.3074, -0.1557, -

Tokens embeddings:
tensor([-0.0102, -0.2124, -0.1665, -0.2569, -0.3522, -0.2519, -0.2705, -0.2830,
        -0.2825, -0.3074, -0.1557, -0.1557, -0.2735, -0.2193, -0.3281, -0.3628,
        -0.3012, -0.3432, -0.2139, -0.3356, -0.2955,  0.0443, -0.1557,  0.0155,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2124, -0.1665, -0.2569, -0.3522, -0.2519, -0.2705, -0.2830,
        -0.2825, -0.3074, -0.1557, -

Tokens embeddings:
tensor([-0.0102, -0.2124, -0.1665, -0.2569, -0.3522, -0.2519, -0.2705, -0.2830,
        -0.2825, -0.3074, -0.1557, -0.1557, -0.2735, -0.2193, -0.3281, -0.3628,
        -0.3012, -0.3432, -0.2139, -0.3356, -0.2955,  0.0443, -0.1557,  0.0155,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2124, -0.1665, -0.2569, -0.3522, -0.2519, -0.2705, -0.2830,
        -0.2825, -0.3074, -0.1557, -

Tokens embeddings:
tensor([-0.0102, -0.2124, -0.1665, -0.2569, -0.3522, -0.2519, -0.2705, -0.2830,
        -0.2825, -0.3074, -0.1557, -0.1557, -0.2735, -0.2193, -0.3281, -0.3628,
        -0.3012, -0.3432, -0.2139, -0.3356, -0.2955,  0.0443, -0.1557,  0.0155,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2124, -0.1665, -0.2569, -0.3522, -0.2519, -0.2705, -0.2830,
        -0.2825, -0.3074, -0.1557, -

Tokens embeddings:
tensor([-0.0102, -0.2124, -0.1665, -0.2569, -0.3522, -0.2519, -0.2705, -0.2830,
        -0.2825, -0.3074, -0.1557, -0.1557, -0.2735, -0.2193, -0.3281, -0.3628,
        -0.3012, -0.3432, -0.2139, -0.3356, -0.2955,  0.0443, -0.1557,  0.0155,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2124, -0.1665, -0.2569, -0.3522, -0.2519, -0.2705, -0.2830,
        -0.2825, -0.3074, -0.1557, -

Tokens embeddings:
tensor([-0.0102, -0.2124, -0.1665, -0.2569, -0.3522, -0.2519, -0.2705, -0.2830,
        -0.2825, -0.3074, -0.1557, -0.1557, -0.2735, -0.2193, -0.3281, -0.3628,
        -0.3012, -0.3432, -0.2139, -0.3356, -0.2955,  0.0443, -0.1557,  0.0155,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102, -0.0102,
        -0.0102, -0.0102, -0.0102, -0.0102, -0.0102])
arg max of tensor([-0.0102, -0.2124, -0.1665, -0.2569, -0.3522, -0.2519, -0.2705, -0.2830,
        -0.2825, -0.3074, -0.1557, -

In [None]:
# Pooling First token [CLS] for each sentence - argmax

# Select some sentences randomly
sent_index = [21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64]

for s in sent_index:
  print("*" * 100)
  # Get the sentence's words
  tokens = test_inputs[s]
  # For each layer...
  for l in range(12):
    print("\nLayer", l+1)
    attention = np.squeeze(test_attentions[s][l].detach().cpu().numpy(), axis=0)
    # and for each head
    #for h, head in enumerate(attention):
    #print("Head", h+1)
    # Get the attention for the cls token
    encoded_tokens = bert_tokenizer(tokens, padding=True, truncation=True, max_length=128, return_tensors='pt')
    encoded_tokens = encoded_tokens.to(device)
    with torch.no_grad():
      model_output1 = model_e(**encoded_tokens)
      tokens_embeddings = model_output1[0][:,0]
      tokens_embeddings = tokens_embeddings.cpu()
      cls_attentions = head[0]
      display(HTML(colorize(tokens, cls_attentions)))
      print("Tokens embeddings:")
      print(tokens_embeddings)
      arg = argmax(tokens_embeddings)
      print('arg max of %s: %d' % (tokens_embeddings, arg))

Output hidden; open in https://colab.research.google.com to view.

In [None]:
# Max Pooling for all tokens in sentences and argmax

# Select some sentences randomly
sent_index = [21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64]

for s in sent_index:
  print("*" * 100)
  # Get the sentence's words
  tokens = test_inputs[s]
  # For each layer...
  for l in range(12):
    print("\nLayer", l+1)
    attention = np.squeeze(test_attentions[s][l].detach().cpu().numpy(), axis=0)
    # and for each head
    #print("Head", h+1)
    # Get the attention for the cls token
    encoded_tokens = bert_tokenizer(tokens, truncation=True, padding=True, max_length=128, return_tensors='pt')
    encoded_tokens = encoded_tokens.to(device)
    with torch.no_grad():
      model_output1 = model_e(**encoded_tokens)
      tokens_embeddings = max_pooling(model_output1, encoded_tokens['attention_mask'])
      tokens_embeddings = tokens_embeddings.cpu()
      cls_attentions = head[0]
      display(HTML(colorize(tokens, cls_attentions)))
      print("Tokens embeddings:")
      print(tokens_embeddings)
      arg = argmax(tokens_embeddings)
      print('arg max of %s: %d' % (tokens_embeddings, arg))