In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [2]:
!pip install sentencepiece

Collecting sentencepiece
[?25l  Downloading https://files.pythonhosted.org/packages/ac/aa/1437691b0c7c83086ebb79ce2da16e00bef024f24fec2a5161c35476f499/sentencepiece-0.1.96-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2MB)
[K     |▎                               | 10kB 18.2MB/s eta 0:00:01[K     |▌                               | 20kB 22.0MB/s eta 0:00:01[K     |▉                               | 30kB 19.1MB/s eta 0:00:01[K     |█                               | 40kB 15.6MB/s eta 0:00:01[K     |█▍                              | 51kB 8.5MB/s eta 0:00:01[K     |█▋                              | 61kB 9.1MB/s eta 0:00:01[K     |██                              | 71kB 9.3MB/s eta 0:00:01[K     |██▏                             | 81kB 8.5MB/s eta 0:00:01[K     |██▍                             | 92kB 9.1MB/s eta 0:00:01[K     |██▊                             | 102kB 8.1MB/s eta 0:00:01[K     |███                             | 112kB 8.1MB/s eta 0:00:01[K   

In [3]:
%%capture
!pip install transformers

In [4]:
import torch
import numpy as np
import pandas as pd
import torch.nn as nn

import transformers
from sklearn.metrics import *
from transformers import AdamW
from tqdm.notebook import tqdm
from scipy.special import softmax
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split as tts
from transformers import BertTokenizerFast, BertConfig, BertForSequenceClassification, AutoModel
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

In [5]:
# Define the device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

#### Download

In [6]:
cd drive/My Drive/Colab Notebooks/experiments

/content/drive/My Drive/Colab Notebooks/experiments


In [7]:
# Download humor detection data
# Paper: https://arxiv.org/abs/2004.12765
data = pd.read_csv("data/trofix.csv")
print("\nThere are", len(data), "sentences")

# Use the standard text/label columns
# Create labels: 1 --> humorous, 0 --> not humorous
#data = data.dropna()
data["label"] = data["label"].apply(int)
data.head()


There are 1444 sentences


Unnamed: 0,arg1,arg2,verb,sentence,verb_stem,label
0,mileage,struck,blow,Triple mileage has struck another blow to the ...,strike,1
1,terrorist,attack,target,U.S. officials said evidence suggests that a J...,attack,0
2,forces,stepped,use,"Some police forces , for example , have steppe...",step,0
3,day,pour,stream,"Every day his troops gather under the green , ...",pour,0
4,manufacturers,rolling,products,He says manufacturers are increasingly rolling...,roll,1


#### Split to training, validation and test



In [8]:
# Use a subset for quick experiments
#data = data[:10000]

# Split to train, val and test
train, test = tts(data[["sentence", "label"]], random_state=42, test_size=0.1)
train, val = tts(train, random_state=42, test_size=test.shape[0])

#### Tokenize and encode with BERT tokenizer

In [9]:
from transformers import XLMRobertaForSequenceClassification

output_dir = 'stockholm/xlm_code/trofix_xlmroberta/xlm-roberta_model_save'

print(output_dir)

stockholm/xlm_code/trofix_xlmroberta/xlm-roberta_model_save


In [10]:
from transformers import XLMRobertaTokenizer
import torch
# Load the BERT tokenizer.
print('Loading XLMRobertaTokenizer...')
bert_tokenizer = XLMRobertaTokenizer.from_pretrained(output_dir)
model_e = XLMRobertaForSequenceClassification.from_pretrained(output_dir, num_labels = 2, output_attentions = True, output_hidden_states = True,)

Loading XLMRobertaTokenizer...


Some weights of the model checkpoint at stockholm/xlm_code/trofix_xlmroberta/xlm-roberta_model_save were not used when initializing XLMRobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [11]:
MAX_LEN = 21 # This value could be set as 256, 512 etc.

encoded_instance = bert_tokenizer.encode_plus(
            train.iloc[0].sentence,
            truncation = True,                
            add_special_tokens = True,
            max_length = MAX_LEN,     
            pad_to_max_length = True,
            return_attention_mask = True,  
            return_tensors = 'pt' # return pytorch tensors
       )


encoded_instance



{'input_ids': tensor([[     0,   9563,    237,  41325,    136,  10142,  17957,   3444,     47,
          59102,     10,  63847,    111,  43851,  36867,    242,      7, 181991,
             53,    390,      2]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [12]:
print("Original text:", train.iloc[0].sentence)
print("BERT BPEs:", bert_tokenizer.convert_ids_to_tokens(encoded_instance["input_ids"][0]))

Original text: Just as Reuters and Telerate want to grab a piece of Quotron 's territory by offering U.S. equities quotes , Quotron is eyeing their turf and mulling ways to offer pricing information on fixed-income instruments and foreign currencies .
BERT BPEs: ['<s>', '▁Just', '▁as', '▁Reuters', '▁and', '▁Tele', 'rate', '▁want', '▁to', '▁grab', '▁a', '▁piece', '▁of', '▁Quo', 'tron', "▁'", 's', '▁territor', 'y', '▁by', '</s>']


In [13]:
# Set max_len to the maximum length of the training data 
max_len = max([len(bert_tokenizer.encode(s)) for s in train.sentence.to_list()])
print("The maximum sentence length in training based on BERT BPEs is", max_len)

The maximum sentence length in training based on BERT BPEs is 121


In [14]:
# Tokenize and encode sentences in each set
x_train = bert_tokenizer.batch_encode_plus(
    train.sentence.tolist(),
    max_length = max_len,
    padding=True,
    truncation=True
)
x_val = bert_tokenizer.batch_encode_plus(
    val.sentence.tolist(),
    max_length = max_len,
    padding=True,
    truncation=True
)
x_test = bert_tokenizer.batch_encode_plus(
    test.sentence.tolist(),
    max_length = max_len,
    padding=True,
    truncation=True
)

In [15]:
# Convert lists to tensors in order to feed them to our PyTorch model
train_seq = torch.tensor(x_train['input_ids'])
train_mask = torch.tensor(x_train['attention_mask'])
train_y = torch.tensor(train.label.tolist())

val_seq = torch.tensor(x_val['input_ids'])
val_mask = torch.tensor(x_val['attention_mask'])
val_y = torch.tensor(val.label.tolist())

test_seq = torch.tensor(x_test['input_ids'])
test_mask = torch.tensor(x_test['attention_mask'])
test_y = torch.tensor(test.label.tolist())

In [16]:
batch_size = 32

# Create a dataloader for each set

# TensorDataset: Creates a PyTorch dataset object to load data from
train_data = TensorDataset(train_seq, train_mask, train_y)
# RandomSampler: specify the sequence of indices/keys used in data loading
train_sampler = RandomSampler(train_data)
# DataLoader: a Python iterable over a dataset
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

val_data = TensorDataset(val_seq, val_mask, val_y)
val_sampler = SequentialSampler(val_data)
val_dataloader = DataLoader(val_data, sampler=val_sampler, batch_size=batch_size)

test_data = TensorDataset(test_seq, test_mask, test_y)
test_sampler = SequentialSampler(test_data)
test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=1)

## Inference

#### Load the saved checkpoint

In [17]:
model_e = model_e.to(device)

#### Get predictions for test

In [18]:
# Predict for the test set and save the results
model_e.eval()
test_predictions = []
test_targets = []
test_attentions = []
test_inputs = []

for batch in test_dataloader:
  batch = [t.to(device) for t in batch]
  sent_id, mask, labels = batch
  # Get gold labels
  test_targets.extend(labels.detach().cpu().numpy())
  # Get input words
  test_inputs.append(bert_tokenizer.convert_ids_to_tokens(sent_id.detach().cpu().numpy()[0]))
  with torch.no_grad():
    # Get predictions
    outputs = model_e(sent_id, attention_mask=mask)
    # Apply softmax to the outputs
    output_probs = softmax(outputs.logits.detach().cpu().numpy(), axis=1)
    # Get the with the highest probability as the predicted label
    test_predictions.extend(np.argmax(output_probs, axis=1))
    # Get attention weights
    # Attention weights from all layers are returned in a tuple
    # The weights from each layer are in a tensor with shape (batch_size, attention_heads, max_len, max_len)
    test_attentions.append(outputs.attentions)

#### Evaluate

In [19]:
print("F1:", f1_score(test_targets, test_predictions, average="binary"))
print("ACC:", accuracy_score(test_targets, test_predictions))
print("AUPR:", average_precision_score(test_targets, test_predictions))
print("PRECISION:", precision_score(test_targets, test_predictions))
print("RECALL:", recall_score(test_targets, test_predictions))
print("AUC:", roc_auc_score(test_targets, test_predictions))

F1: 0.9312977099236641
ACC: 0.9379310344827586
AUPR: 0.894952174262519
PRECISION: 0.9242424242424242
RECALL: 0.9384615384615385
AUC: 0.9379807692307692


## Attention analysis


In [20]:
# Get attention heatmaps
import matplotlib
from IPython.core.display import display, HTML
def colorize(words, color_array):
    cmap=matplotlib.cm.Reds
    template = '<span class="barcode"; style="color: black; background-color: {}">{}</span>'
    colored_string = ''
    for word, color in zip(words, color_array):
        color = matplotlib.colors.rgb2hex(cmap(color)[:3])
        colored_string += template.format(color, '&nbsp' + word + '&nbsp')
    return colored_string

In [21]:
#max pooling to generate a fixed sized sentence embedding


#Max Pooling - Take the max value over time for every dimension
def max_pooling(model_output, attention_mask):
    token_embeddings = model_output[0] #First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.resize_(token_embeddings.size())
    #input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    token_embeddings[input_mask_expanded == 0] = -1e9  # Set padding tokens to large negative value
    max_over_time = torch.max(token_embeddings, 1)[0]
    return max_over_time

def avg_pooling(model_output, attention_mask):
    token_embeddings = model_output[0] #First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.resize_(token_embeddings.size())
    #input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    token_embeddings[input_mask_expanded == 0] = -1e9  # Set padding tokens to large negative value
    avg_over_time = torch.mean(token_embeddings, 1)[0]
    return avg_over_time


#Sentences we want sentence embeddings for
sentences = ['The', 'stars', 'gravitate', 'towards', 'each', 'other.']

#Tokenize sentences
encoded_input = bert_tokenizer(sentences, padding=True, truncation=True, max_length=128, return_tensors='pt')
encoded_input = encoded_input.to(device)

#Compute token embeddings
with torch.no_grad():
    model_output = model_e(**encoded_input)

#Perform pooling. In this case, max pooling
sentence_embeddings = max_pooling(model_output, encoded_input['attention_mask'])
#avg_sentence_embeddings = avg_pooling(model_output, encoded_input['attention_mask'])


print("Sentence embeddings:")
print(sentence_embeddings)

Sentence embeddings:
tensor([0.0774, 0.0319, 0.2602, 0.0943, 0.4257, 0.9204], device='cuda:0')


In [22]:
# numpy implementation of argmax
from numpy import argmax

sentence_embeddings = sentence_embeddings.cpu()

# get argmax
result = argmax(sentence_embeddings)
print('arg max of %s: %d' % (sentence_embeddings, result))

arg max of tensor([0.0774, 0.0319, 0.2602, 0.0943, 0.4257, 0.9204]): 5


In [23]:
#CLS token of each input represents the sentence embedding


#Sentences we want sentence embeddings for
sentences = ['The', 'stars', 'gravitate', 'towards', 'each', 'other']


#Tokenize sentences
encoded_input = bert_tokenizer(sentences, padding=True, truncation=True, max_length=128, return_tensors='pt')
encoded_input = encoded_input.to(device)

#Compute token embeddings
with torch.no_grad():
    model_output = model_e(**encoded_input)
    #model_output = model_output.to(device)
    
sentence_embeddings = model_output[0][:,0] #Take the first token ([CLS]) from each sentence 

print("Sentence embeddings:")
print(sentence_embeddings)

Sentence embeddings:
tensor([-0.1941,  0.0319,  0.2602,  0.0943,  0.4257,  0.2574], device='cuda:0')


In [24]:
# numpy implementation of argmax
from numpy import argmax

sentence_embeddings = sentence_embeddings.cpu()

# get argmax
result = argmax(sentence_embeddings)
print('arg max of %s: %d' % (sentence_embeddings, result))

arg max of tensor([-0.1941,  0.0319,  0.2602,  0.0943,  0.4257,  0.2574]): 4


#### What does the CLS token attend to?




In [25]:
# Original Loop

# Select some sentences randomly
sent_index = [0, 1, 2]

for s in sent_index:
  print("*" * 100)
  # Get the sentence's words
  tokens = test_inputs[s]
  # For each layer...
  for l in range(12):
    print("\nLayer", l+1)
    attention = np.squeeze(test_attentions[s][l].detach().cpu().numpy(), axis=0)
    # and for each head
    for h, head in enumerate(attention):
      print("Head", h+1)
      # Get the attention for the cls token
      cls_attentions = head[0]
      display(HTML(colorize(tokens, cls_attentions)))
    

Output hidden; open in https://colab.research.google.com to view.

In [26]:
# Max Pooling for all tokens in sentences and argmax

# Select some sentences randomly
sent_index = [0,1,2,3,4,5,6,7,8,9,10]

for s in sent_index:
  print("*" * 100)
  # Get the sentence's words
  tokens = test_inputs[s]
  # For each layer...
  for l in range(12):
    print("\nLayer", l+1)
    attention = np.squeeze(test_attentions[s][l].detach().cpu().numpy(), axis=0)
    # and for each head
    #print("Head", h+1)
    # Get the attention for the cls token
    encoded_tokens = bert_tokenizer(tokens, truncation=True, padding=True, max_length=128, return_tensors='pt')
    encoded_tokens = encoded_tokens.to(device)
    with torch.no_grad():
      model_output1 = model_e(**encoded_tokens)
      tokens_embeddings = max_pooling(model_output1, encoded_tokens['attention_mask'])
      tokens_embeddings = tokens_embeddings.cpu()
      cls_attentions = head[0]
      display(HTML(colorize(tokens, cls_attentions)))
      print("Tokens embeddings:")
      print(tokens_embeddings)
      arg = argmax(tokens_embeddings)
      print('arg max of %s: %d' % (tokens_embeddings, arg))

****************************************************************************************************

Layer 1


Tokens embeddings:
tensor([ 3.3173e-02,  1.3241e-02,  2.0042e-01, -1.3742e-02,  5.0742e-02,
         1.1194e+00,  2.1790e-01,  3.3441e-01,  4.7600e-01,  6.8550e-01,
         3.5810e-02,  1.7075e-01,  5.7291e-02, -7.6728e-02,  8.5365e-02,
         5.0742e-02,  1.1194e+00, -2.1220e-02,  2.7198e-01,  3.0295e-02,
         8.2871e-02,  1.2012e-01,  8.6283e-02,  1.0056e-02,  2.0042e-01,
        -1.8481e-02,  5.0742e-02,  5.2506e-02,  9.0812e-02,  6.0246e-01,
         4.3039e-01,  1.2945e-01,  5.2506e-02,  2.6615e-01,  2.9773e-01,
         6.2249e-01,  8.6283e-02,  8.6060e-01,  3.2138e-01, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.3241e-02,  2.0042e-01, -1.3742e-02,  5.0742e-02,
         1.1194e+00,  2.1790e-01,  3.3441e-01,  4.7600e-01,  6.8550e-01,
         3.5810e-02,  1.7075e-01,  5.7291e-02, -7.6728e-02,  8.5365e-02,
         5.0742e-02,  1.1194e+00, -2.1220e-02,  2.7198e-01,  3.0295e-02,
         8.2871e-02,  1.2012e-01,  8.6283e-02,  1.0056e-02,  2.0042e-01,
        -1.8481e-02,  5.0742e-02,  5.2506e-02,  9.0812e-02,  6.0246e-01,
         4.3039e-01,  1.2945e-01,  5.2506e-02,  2.6615e-01,  2.9773e-01,
         6.2249e-01,  8.6283e-02,  8.6060e-01,  3.2138e-01, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.3241e-02,  2.0042e-01, -1.3742e-02,  5.0742e-02,
         1.1194e+00,  2.1790e-01,  3.3441e-01,  4.7600e-01,  6.8550e-01,
         3.5810e-02,  1.7075e-01,  5.7291e-02, -7.6728e-02,  8.5365e-02,
         5.0742e-02,  1.1194e+00, -2.1220e-02,  2.7198e-01,  3.0295e-02,
         8.2871e-02,  1.2012e-01,  8.6283e-02,  1.0056e-02,  2.0042e-01,
        -1.8481e-02,  5.0742e-02,  5.2506e-02,  9.0812e-02,  6.0246e-01,
         4.3039e-01,  1.2945e-01,  5.2506e-02,  2.6615e-01,  2.9773e-01,
         6.2249e-01,  8.6283e-02,  8.6060e-01,  3.2138e-01, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.3241e-02,  2.0042e-01, -1.3742e-02,  5.0742e-02,
         1.1194e+00,  2.1790e-01,  3.3441e-01,  4.7600e-01,  6.8550e-01,
         3.5810e-02,  1.7075e-01,  5.7291e-02, -7.6728e-02,  8.5365e-02,
         5.0742e-02,  1.1194e+00, -2.1220e-02,  2.7198e-01,  3.0295e-02,
         8.2871e-02,  1.2012e-01,  8.6283e-02,  1.0056e-02,  2.0042e-01,
        -1.8481e-02,  5.0742e-02,  5.2506e-02,  9.0812e-02,  6.0246e-01,
         4.3039e-01,  1.2945e-01,  5.2506e-02,  2.6615e-01,  2.9773e-01,
         6.2249e-01,  8.6283e-02,  8.6060e-01,  3.2138e-01, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.3241e-02,  2.0042e-01, -1.3742e-02,  5.0742e-02,
         1.1194e+00,  2.1790e-01,  3.3441e-01,  4.7600e-01,  6.8550e-01,
         3.5810e-02,  1.7075e-01,  5.7291e-02, -7.6728e-02,  8.5365e-02,
         5.0742e-02,  1.1194e+00, -2.1220e-02,  2.7198e-01,  3.0295e-02,
         8.2871e-02,  1.2012e-01,  8.6283e-02,  1.0056e-02,  2.0042e-01,
        -1.8481e-02,  5.0742e-02,  5.2506e-02,  9.0812e-02,  6.0246e-01,
         4.3039e-01,  1.2945e-01,  5.2506e-02,  2.6615e-01,  2.9773e-01,
         6.2249e-01,  8.6283e-02,  8.6060e-01,  3.2138e-01, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.3241e-02,  2.0042e-01, -1.3742e-02,  5.0742e-02,
         1.1194e+00,  2.1790e-01,  3.3441e-01,  4.7600e-01,  6.8550e-01,
         3.5810e-02,  1.7075e-01,  5.7291e-02, -7.6728e-02,  8.5365e-02,
         5.0742e-02,  1.1194e+00, -2.1220e-02,  2.7198e-01,  3.0295e-02,
         8.2871e-02,  1.2012e-01,  8.6283e-02,  1.0056e-02,  2.0042e-01,
        -1.8481e-02,  5.0742e-02,  5.2506e-02,  9.0812e-02,  6.0246e-01,
         4.3039e-01,  1.2945e-01,  5.2506e-02,  2.6615e-01,  2.9773e-01,
         6.2249e-01,  8.6283e-02,  8.6060e-01,  3.2138e-01, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.3241e-02,  2.0042e-01, -1.3742e-02,  5.0742e-02,
         1.1194e+00,  2.1790e-01,  3.3441e-01,  4.7600e-01,  6.8550e-01,
         3.5810e-02,  1.7075e-01,  5.7291e-02, -7.6728e-02,  8.5365e-02,
         5.0742e-02,  1.1194e+00, -2.1220e-02,  2.7198e-01,  3.0295e-02,
         8.2871e-02,  1.2012e-01,  8.6283e-02,  1.0056e-02,  2.0042e-01,
        -1.8481e-02,  5.0742e-02,  5.2506e-02,  9.0812e-02,  6.0246e-01,
         4.3039e-01,  1.2945e-01,  5.2506e-02,  2.6615e-01,  2.9773e-01,
         6.2249e-01,  8.6283e-02,  8.6060e-01,  3.2138e-01, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.3241e-02,  2.0042e-01, -1.3742e-02,  5.0742e-02,
         1.1194e+00,  2.1790e-01,  3.3441e-01,  4.7600e-01,  6.8550e-01,
         3.5810e-02,  1.7075e-01,  5.7291e-02, -7.6728e-02,  8.5365e-02,
         5.0742e-02,  1.1194e+00, -2.1220e-02,  2.7198e-01,  3.0295e-02,
         8.2871e-02,  1.2012e-01,  8.6283e-02,  1.0056e-02,  2.0042e-01,
        -1.8481e-02,  5.0742e-02,  5.2506e-02,  9.0812e-02,  6.0246e-01,
         4.3039e-01,  1.2945e-01,  5.2506e-02,  2.6615e-01,  2.9773e-01,
         6.2249e-01,  8.6283e-02,  8.6060e-01,  3.2138e-01, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.3241e-02,  2.0042e-01, -1.3742e-02,  5.0742e-02,
         1.1194e+00,  2.1790e-01,  3.3441e-01,  4.7600e-01,  6.8550e-01,
         3.5810e-02,  1.7075e-01,  5.7291e-02, -7.6728e-02,  8.5365e-02,
         5.0742e-02,  1.1194e+00, -2.1220e-02,  2.7198e-01,  3.0295e-02,
         8.2871e-02,  1.2012e-01,  8.6283e-02,  1.0056e-02,  2.0042e-01,
        -1.8481e-02,  5.0742e-02,  5.2506e-02,  9.0812e-02,  6.0246e-01,
         4.3039e-01,  1.2945e-01,  5.2506e-02,  2.6615e-01,  2.9773e-01,
         6.2249e-01,  8.6283e-02,  8.6060e-01,  3.2138e-01, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.3241e-02,  2.0042e-01, -1.3742e-02,  5.0742e-02,
         1.1194e+00,  2.1790e-01,  3.3441e-01,  4.7600e-01,  6.8550e-01,
         3.5810e-02,  1.7075e-01,  5.7291e-02, -7.6728e-02,  8.5365e-02,
         5.0742e-02,  1.1194e+00, -2.1220e-02,  2.7198e-01,  3.0295e-02,
         8.2871e-02,  1.2012e-01,  8.6283e-02,  1.0056e-02,  2.0042e-01,
        -1.8481e-02,  5.0742e-02,  5.2506e-02,  9.0812e-02,  6.0246e-01,
         4.3039e-01,  1.2945e-01,  5.2506e-02,  2.6615e-01,  2.9773e-01,
         6.2249e-01,  8.6283e-02,  8.6060e-01,  3.2138e-01, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.3241e-02,  2.0042e-01, -1.3742e-02,  5.0742e-02,
         1.1194e+00,  2.1790e-01,  3.3441e-01,  4.7600e-01,  6.8550e-01,
         3.5810e-02,  1.7075e-01,  5.7291e-02, -7.6728e-02,  8.5365e-02,
         5.0742e-02,  1.1194e+00, -2.1220e-02,  2.7198e-01,  3.0295e-02,
         8.2871e-02,  1.2012e-01,  8.6283e-02,  1.0056e-02,  2.0042e-01,
        -1.8481e-02,  5.0742e-02,  5.2506e-02,  9.0812e-02,  6.0246e-01,
         4.3039e-01,  1.2945e-01,  5.2506e-02,  2.6615e-01,  2.9773e-01,
         6.2249e-01,  8.6283e-02,  8.6060e-01,  3.2138e-01, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.3241e-02,  2.0042e-01, -1.3742e-02,  5.0742e-02,
         1.1194e+00,  2.1790e-01,  3.3441e-01,  4.7600e-01,  6.8550e-01,
         3.5810e-02,  1.7075e-01,  5.7291e-02, -7.6728e-02,  8.5365e-02,
         5.0742e-02,  1.1194e+00, -2.1220e-02,  2.7198e-01,  3.0295e-02,
         8.2871e-02,  1.2012e-01,  8.6283e-02,  1.0056e-02,  2.0042e-01,
        -1.8481e-02,  5.0742e-02,  5.2506e-02,  9.0812e-02,  6.0246e-01,
         4.3039e-01,  1.2945e-01,  5.2506e-02,  2.6615e-01,  2.9773e-01,
         6.2249e-01,  8.6283e-02,  8.6060e-01,  3.2138e-01, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  2.8780e-02,  2.5566e-02,  1.0841e-01,  5.1002e-02,
         9.9946e-02,  4.4181e-01,  2.4700e-01,  7.9883e-02, -5.7180e-03,
         6.0246e-01,  2.0821e-01,  7.5419e-02,  3.1423e-01,  9.9946e-02,
         2.7198e-01,  2.4700e-01,  3.6226e-02,  5.2006e-02,  9.4834e-02,
         9.9567e-03, -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  2.8780e-02,  2.5566e-02,  1.0841e-01,  5.1002e-02,
         9.9946e-02,  4.4181e-01,  2.4700e-01,  7.9883e-02, -5.7180e-03,
         6.0246e-01,  2.0821e-01,  7.5419e-02,  3.1423e-01,  9.9946e-02,
         2.7198e-01,  2.4700e-01,  3.6226e-02,  5.2006e-02,  9.4834e-02,
         9.9567e-03, -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  2.8780e-02,  2.5566e-02,  1.0841e-01,  5.1002e-02,
         9.9946e-02,  4.4181e-01,  2.4700e-01,  7.9883e-02, -5.7180e-03,
         6.0246e-01,  2.0821e-01,  7.5419e-02,  3.1423e-01,  9.9946e-02,
         2.7198e-01,  2.4700e-01,  3.6226e-02,  5.2006e-02,  9.4834e-02,
         9.9567e-03, -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  2.8780e-02,  2.5566e-02,  1.0841e-01,  5.1002e-02,
         9.9946e-02,  4.4181e-01,  2.4700e-01,  7.9883e-02, -5.7180e-03,
         6.0246e-01,  2.0821e-01,  7.5419e-02,  3.1423e-01,  9.9946e-02,
         2.7198e-01,  2.4700e-01,  3.6226e-02,  5.2006e-02,  9.4834e-02,
         9.9567e-03, -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  2.8780e-02,  2.5566e-02,  1.0841e-01,  5.1002e-02,
         9.9946e-02,  4.4181e-01,  2.4700e-01,  7.9883e-02, -5.7180e-03,
         6.0246e-01,  2.0821e-01,  7.5419e-02,  3.1423e-01,  9.9946e-02,
         2.7198e-01,  2.4700e-01,  3.6226e-02,  5.2006e-02,  9.4834e-02,
         9.9567e-03, -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  2.8780e-02,  2.5566e-02,  1.0841e-01,  5.1002e-02,
         9.9946e-02,  4.4181e-01,  2.4700e-01,  7.9883e-02, -5.7180e-03,
         6.0246e-01,  2.0821e-01,  7.5419e-02,  3.1423e-01,  9.9946e-02,
         2.7198e-01,  2.4700e-01,  3.6226e-02,  5.2006e-02,  9.4834e-02,
         9.9567e-03, -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  2.8780e-02,  2.5566e-02,  1.0841e-01,  5.1002e-02,
         9.9946e-02,  4.4181e-01,  2.4700e-01,  7.9883e-02, -5.7180e-03,
         6.0246e-01,  2.0821e-01,  7.5419e-02,  3.1423e-01,  9.9946e-02,
         2.7198e-01,  2.4700e-01,  3.6226e-02,  5.2006e-02,  9.4834e-02,
         9.9567e-03, -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  2.8780e-02,  2.5566e-02,  1.0841e-01,  5.1002e-02,
         9.9946e-02,  4.4181e-01,  2.4700e-01,  7.9883e-02, -5.7180e-03,
         6.0246e-01,  2.0821e-01,  7.5419e-02,  3.1423e-01,  9.9946e-02,
         2.7198e-01,  2.4700e-01,  3.6226e-02,  5.2006e-02,  9.4834e-02,
         9.9567e-03, -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  2.8780e-02,  2.5566e-02,  1.0841e-01,  5.1002e-02,
         9.9946e-02,  4.4181e-01,  2.4700e-01,  7.9883e-02, -5.7180e-03,
         6.0246e-01,  2.0821e-01,  7.5419e-02,  3.1423e-01,  9.9946e-02,
         2.7198e-01,  2.4700e-01,  3.6226e-02,  5.2006e-02,  9.4834e-02,
         9.9567e-03, -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  2.8780e-02,  2.5566e-02,  1.0841e-01,  5.1002e-02,
         9.9946e-02,  4.4181e-01,  2.4700e-01,  7.9883e-02, -5.7180e-03,
         6.0246e-01,  2.0821e-01,  7.5419e-02,  3.1423e-01,  9.9946e-02,
         2.7198e-01,  2.4700e-01,  3.6226e-02,  5.2006e-02,  9.4834e-02,
         9.9567e-03, -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  2.8780e-02,  2.5566e-02,  1.0841e-01,  5.1002e-02,
         9.9946e-02,  4.4181e-01,  2.4700e-01,  7.9883e-02, -5.7180e-03,
         6.0246e-01,  2.0821e-01,  7.5419e-02,  3.1423e-01,  9.9946e-02,
         2.7198e-01,  2.4700e-01,  3.6226e-02,  5.2006e-02,  9.4834e-02,
         9.9567e-03, -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  2.8780e-02,  2.5566e-02,  1.0841e-01,  5.1002e-02,
         9.9946e-02,  4.4181e-01,  2.4700e-01,  7.9883e-02, -5.7180e-03,
         6.0246e-01,  2.0821e-01,  7.5419e-02,  3.1423e-01,  9.9946e-02,
         2.7198e-01,  2.4700e-01,  3.6226e-02,  5.2006e-02,  9.4834e-02,
         9.9567e-03, -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  1.2365e-01, -1.4134e-02,  9.9946e-02,
         9.4834e-02,  2.9773e-01,  1.4855e-02,  2.4864e-02, -7.5168e-03,
         6.0391e-02, -2.7676e-02,  3.6311e-02,  1.7804e-01,  5.1477e-01,
         1.3542e-01,  7.1886e-02,  6.0246e-01,  9.4834e-02,  4.0574e-02,
         8.2784e-02,  1.6755e-01,  6.7225e-01, -1.0000e+09,  3.6311e-02,
         6.8436e-02,  2.9944e-01, -2.4761e-03,  5.8823e-02,  8.7809e-01,
         8.2697e-02,  4.8093e-01,  8.6283e-02,  1.1079e-01,  6.0246e-01,
         9.6486e-01,  4.1047e-02,  4.8093e-01,  3.2395e-02, -1.7971e-02,
         1.3566e-01, -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  1.2365e-01, -1.4134e-02,  9.9946e-02,
         9.4834e-02,  2.9773e-01,  1.4855e-02,  2.4864e-02, -7.5168e-03,
         6.0391e-02, -2.7676e-02,  3.6311e-02,  1.7804e-01,  5.1477e-01,
         1.3542e-01,  7.1886e-02,  6.0246e-01,  9.4834e-02,  4.0574e-02,
         8.2784e-02,  1.6755e-01,  6.7225e-01, -1.0000e+09,  3.6311e-02,
         6.8436e-02,  2.9944e-01, -2.4761e-03,  5.8823e-02,  8.7809e-01,
         8.2697e-02,  4.8093e-01,  8.6283e-02,  1.1079e-01,  6.0246e-01,
         9.6486e-01,  4.1047e-02,  4.8093e-01,  3.2395e-02, -1.7971e-02,
         1.3566e-01, -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  1.2365e-01, -1.4134e-02,  9.9946e-02,
         9.4834e-02,  2.9773e-01,  1.4855e-02,  2.4864e-02, -7.5168e-03,
         6.0391e-02, -2.7676e-02,  3.6311e-02,  1.7804e-01,  5.1477e-01,
         1.3542e-01,  7.1886e-02,  6.0246e-01,  9.4834e-02,  4.0574e-02,
         8.2784e-02,  1.6755e-01,  6.7225e-01, -1.0000e+09,  3.6311e-02,
         6.8436e-02,  2.9944e-01, -2.4761e-03,  5.8823e-02,  8.7809e-01,
         8.2697e-02,  4.8093e-01,  8.6283e-02,  1.1079e-01,  6.0246e-01,
         9.6486e-01,  4.1047e-02,  4.8093e-01,  3.2395e-02, -1.7971e-02,
         1.3566e-01, -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  1.2365e-01, -1.4134e-02,  9.9946e-02,
         9.4834e-02,  2.9773e-01,  1.4855e-02,  2.4864e-02, -7.5168e-03,
         6.0391e-02, -2.7676e-02,  3.6311e-02,  1.7804e-01,  5.1477e-01,
         1.3542e-01,  7.1886e-02,  6.0246e-01,  9.4834e-02,  4.0574e-02,
         8.2784e-02,  1.6755e-01,  6.7225e-01, -1.0000e+09,  3.6311e-02,
         6.8436e-02,  2.9944e-01, -2.4761e-03,  5.8823e-02,  8.7809e-01,
         8.2697e-02,  4.8093e-01,  8.6283e-02,  1.1079e-01,  6.0246e-01,
         9.6486e-01,  4.1047e-02,  4.8093e-01,  3.2395e-02, -1.7971e-02,
         1.3566e-01, -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  1.2365e-01, -1.4134e-02,  9.9946e-02,
         9.4834e-02,  2.9773e-01,  1.4855e-02,  2.4864e-02, -7.5168e-03,
         6.0391e-02, -2.7676e-02,  3.6311e-02,  1.7804e-01,  5.1477e-01,
         1.3542e-01,  7.1886e-02,  6.0246e-01,  9.4834e-02,  4.0574e-02,
         8.2784e-02,  1.6755e-01,  6.7225e-01, -1.0000e+09,  3.6311e-02,
         6.8436e-02,  2.9944e-01, -2.4761e-03,  5.8823e-02,  8.7809e-01,
         8.2697e-02,  4.8093e-01,  8.6283e-02,  1.1079e-01,  6.0246e-01,
         9.6486e-01,  4.1047e-02,  4.8093e-01,  3.2395e-02, -1.7971e-02,
         1.3566e-01, -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  1.2365e-01, -1.4134e-02,  9.9946e-02,
         9.4834e-02,  2.9773e-01,  1.4855e-02,  2.4864e-02, -7.5168e-03,
         6.0391e-02, -2.7676e-02,  3.6311e-02,  1.7804e-01,  5.1477e-01,
         1.3542e-01,  7.1886e-02,  6.0246e-01,  9.4834e-02,  4.0574e-02,
         8.2784e-02,  1.6755e-01,  6.7225e-01, -1.0000e+09,  3.6311e-02,
         6.8436e-02,  2.9944e-01, -2.4761e-03,  5.8823e-02,  8.7809e-01,
         8.2697e-02,  4.8093e-01,  8.6283e-02,  1.1079e-01,  6.0246e-01,
         9.6486e-01,  4.1047e-02,  4.8093e-01,  3.2395e-02, -1.7971e-02,
         1.3566e-01, -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  1.2365e-01, -1.4134e-02,  9.9946e-02,
         9.4834e-02,  2.9773e-01,  1.4855e-02,  2.4864e-02, -7.5168e-03,
         6.0391e-02, -2.7676e-02,  3.6311e-02,  1.7804e-01,  5.1477e-01,
         1.3542e-01,  7.1886e-02,  6.0246e-01,  9.4834e-02,  4.0574e-02,
         8.2784e-02,  1.6755e-01,  6.7225e-01, -1.0000e+09,  3.6311e-02,
         6.8436e-02,  2.9944e-01, -2.4761e-03,  5.8823e-02,  8.7809e-01,
         8.2697e-02,  4.8093e-01,  8.6283e-02,  1.1079e-01,  6.0246e-01,
         9.6486e-01,  4.1047e-02,  4.8093e-01,  3.2395e-02, -1.7971e-02,
         1.3566e-01, -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  1.2365e-01, -1.4134e-02,  9.9946e-02,
         9.4834e-02,  2.9773e-01,  1.4855e-02,  2.4864e-02, -7.5168e-03,
         6.0391e-02, -2.7676e-02,  3.6311e-02,  1.7804e-01,  5.1477e-01,
         1.3542e-01,  7.1886e-02,  6.0246e-01,  9.4834e-02,  4.0574e-02,
         8.2784e-02,  1.6755e-01,  6.7225e-01, -1.0000e+09,  3.6311e-02,
         6.8436e-02,  2.9944e-01, -2.4761e-03,  5.8823e-02,  8.7809e-01,
         8.2697e-02,  4.8093e-01,  8.6283e-02,  1.1079e-01,  6.0246e-01,
         9.6486e-01,  4.1047e-02,  4.8093e-01,  3.2395e-02, -1.7971e-02,
         1.3566e-01, -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  1.2365e-01, -1.4134e-02,  9.9946e-02,
         9.4834e-02,  2.9773e-01,  1.4855e-02,  2.4864e-02, -7.5168e-03,
         6.0391e-02, -2.7676e-02,  3.6311e-02,  1.7804e-01,  5.1477e-01,
         1.3542e-01,  7.1886e-02,  6.0246e-01,  9.4834e-02,  4.0574e-02,
         8.2784e-02,  1.6755e-01,  6.7225e-01, -1.0000e+09,  3.6311e-02,
         6.8436e-02,  2.9944e-01, -2.4761e-03,  5.8823e-02,  8.7809e-01,
         8.2697e-02,  4.8093e-01,  8.6283e-02,  1.1079e-01,  6.0246e-01,
         9.6486e-01,  4.1047e-02,  4.8093e-01,  3.2395e-02, -1.7971e-02,
         1.3566e-01, -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  1.2365e-01, -1.4134e-02,  9.9946e-02,
         9.4834e-02,  2.9773e-01,  1.4855e-02,  2.4864e-02, -7.5168e-03,
         6.0391e-02, -2.7676e-02,  3.6311e-02,  1.7804e-01,  5.1477e-01,
         1.3542e-01,  7.1886e-02,  6.0246e-01,  9.4834e-02,  4.0574e-02,
         8.2784e-02,  1.6755e-01,  6.7225e-01, -1.0000e+09,  3.6311e-02,
         6.8436e-02,  2.9944e-01, -2.4761e-03,  5.8823e-02,  8.7809e-01,
         8.2697e-02,  4.8093e-01,  8.6283e-02,  1.1079e-01,  6.0246e-01,
         9.6486e-01,  4.1047e-02,  4.8093e-01,  3.2395e-02, -1.7971e-02,
         1.3566e-01, -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  1.2365e-01, -1.4134e-02,  9.9946e-02,
         9.4834e-02,  2.9773e-01,  1.4855e-02,  2.4864e-02, -7.5168e-03,
         6.0391e-02, -2.7676e-02,  3.6311e-02,  1.7804e-01,  5.1477e-01,
         1.3542e-01,  7.1886e-02,  6.0246e-01,  9.4834e-02,  4.0574e-02,
         8.2784e-02,  1.6755e-01,  6.7225e-01, -1.0000e+09,  3.6311e-02,
         6.8436e-02,  2.9944e-01, -2.4761e-03,  5.8823e-02,  8.7809e-01,
         8.2697e-02,  4.8093e-01,  8.6283e-02,  1.1079e-01,  6.0246e-01,
         9.6486e-01,  4.1047e-02,  4.8093e-01,  3.2395e-02, -1.7971e-02,
         1.3566e-01, -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  1.2365e-01, -1.4134e-02,  9.9946e-02,
         9.4834e-02,  2.9773e-01,  1.4855e-02,  2.4864e-02, -7.5168e-03,
         6.0391e-02, -2.7676e-02,  3.6311e-02,  1.7804e-01,  5.1477e-01,
         1.3542e-01,  7.1886e-02,  6.0246e-01,  9.4834e-02,  4.0574e-02,
         8.2784e-02,  1.6755e-01,  6.7225e-01, -1.0000e+09,  3.6311e-02,
         6.8436e-02,  2.9944e-01, -2.4761e-03,  5.8823e-02,  8.7809e-01,
         8.2697e-02,  4.8093e-01,  8.6283e-02,  1.1079e-01,  6.0246e-01,
         9.6486e-01,  4.1047e-02,  4.8093e-01,  3.2395e-02, -1.7971e-02,
         1.3566e-01, -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  3.9825e-01,  4.4165e-02,  1.1194e+00,  2.9773e-01,
         3.0679e-01,  6.1045e-01, -4.4630e-03,  5.9357e-01,  1.5389e-01,
         7.5017e-02,  2.4712e-02,  2.6615e-01,  4.0848e-01,  2.9773e-01,
        -4.0521e-02,  2.6578e-01,  8.8600e-02,  6.0775e-02,  1.8884e-02,
         4.2367e-01,  1.9652e-03,  8.6283e-02,  1.0056e-02,  1.4351e-01,
         3.7169e-02,  7.1913e-02, -2.7676e-02,  3.6311e-02,  2.1781e-01,
         1.0359e-01,  4.4977e-01,  2.9773e-01, -7.7107e-02,  1.4900e-02,
        -8.3726e-02,  1.3231e+00,  6.0246e-01,  5.0365e-01,  4.4977e-01,
         2.6547e-01, -5.8958e-02,  5.9766e-02, -2.1948e-02,  8.3316e-02,
        -1.0000e+09,  1.1388e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  3.9825e-01,  4.4165e-02,  1.1194e+00,  2.9773e-01,
         3.0679e-01,  6.1045e-01, -4.4630e-03,  5.9357e-01,  1.5389e-01,
         7.5017e-02,  2.4712e-02,  2.6615e-01,  4.0848e-01,  2.9773e-01,
        -4.0521e-02,  2.6578e-01,  8.8600e-02,  6.0775e-02,  1.8884e-02,
         4.2367e-01,  1.9652e-03,  8.6283e-02,  1.0056e-02,  1.4351e-01,
         3.7169e-02,  7.1913e-02, -2.7676e-02,  3.6311e-02,  2.1781e-01,
         1.0359e-01,  4.4977e-01,  2.9773e-01, -7.7107e-02,  1.4900e-02,
        -8.3726e-02,  1.3231e+00,  6.0246e-01,  5.0365e-01,  4.4977e-01,
         2.6547e-01, -5.8958e-02,  5.9766e-02, -2.1948e-02,  8.3316e-02,
        -1.0000e+09,  1.1388e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  3.9825e-01,  4.4165e-02,  1.1194e+00,  2.9773e-01,
         3.0679e-01,  6.1045e-01, -4.4630e-03,  5.9357e-01,  1.5389e-01,
         7.5017e-02,  2.4712e-02,  2.6615e-01,  4.0848e-01,  2.9773e-01,
        -4.0521e-02,  2.6578e-01,  8.8600e-02,  6.0775e-02,  1.8884e-02,
         4.2367e-01,  1.9652e-03,  8.6283e-02,  1.0056e-02,  1.4351e-01,
         3.7169e-02,  7.1913e-02, -2.7676e-02,  3.6311e-02,  2.1781e-01,
         1.0359e-01,  4.4977e-01,  2.9773e-01, -7.7107e-02,  1.4900e-02,
        -8.3726e-02,  1.3231e+00,  6.0246e-01,  5.0365e-01,  4.4977e-01,
         2.6547e-01, -5.8958e-02,  5.9766e-02, -2.1948e-02,  8.3316e-02,
        -1.0000e+09,  1.1388e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  3.9825e-01,  4.4165e-02,  1.1194e+00,  2.9773e-01,
         3.0679e-01,  6.1045e-01, -4.4630e-03,  5.9357e-01,  1.5389e-01,
         7.5017e-02,  2.4712e-02,  2.6615e-01,  4.0848e-01,  2.9773e-01,
        -4.0521e-02,  2.6578e-01,  8.8600e-02,  6.0775e-02,  1.8884e-02,
         4.2367e-01,  1.9652e-03,  8.6283e-02,  1.0056e-02,  1.4351e-01,
         3.7169e-02,  7.1913e-02, -2.7676e-02,  3.6311e-02,  2.1781e-01,
         1.0359e-01,  4.4977e-01,  2.9773e-01, -7.7107e-02,  1.4900e-02,
        -8.3726e-02,  1.3231e+00,  6.0246e-01,  5.0365e-01,  4.4977e-01,
         2.6547e-01, -5.8958e-02,  5.9766e-02, -2.1948e-02,  8.3316e-02,
        -1.0000e+09,  1.1388e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  3.9825e-01,  4.4165e-02,  1.1194e+00,  2.9773e-01,
         3.0679e-01,  6.1045e-01, -4.4630e-03,  5.9357e-01,  1.5389e-01,
         7.5017e-02,  2.4712e-02,  2.6615e-01,  4.0848e-01,  2.9773e-01,
        -4.0521e-02,  2.6578e-01,  8.8600e-02,  6.0775e-02,  1.8884e-02,
         4.2367e-01,  1.9652e-03,  8.6283e-02,  1.0056e-02,  1.4351e-01,
         3.7169e-02,  7.1913e-02, -2.7676e-02,  3.6311e-02,  2.1781e-01,
         1.0359e-01,  4.4977e-01,  2.9773e-01, -7.7107e-02,  1.4900e-02,
        -8.3726e-02,  1.3231e+00,  6.0246e-01,  5.0365e-01,  4.4977e-01,
         2.6547e-01, -5.8958e-02,  5.9766e-02, -2.1948e-02,  8.3316e-02,
        -1.0000e+09,  1.1388e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  3.9825e-01,  4.4165e-02,  1.1194e+00,  2.9773e-01,
         3.0679e-01,  6.1045e-01, -4.4630e-03,  5.9357e-01,  1.5389e-01,
         7.5017e-02,  2.4712e-02,  2.6615e-01,  4.0848e-01,  2.9773e-01,
        -4.0521e-02,  2.6578e-01,  8.8600e-02,  6.0775e-02,  1.8884e-02,
         4.2367e-01,  1.9652e-03,  8.6283e-02,  1.0056e-02,  1.4351e-01,
         3.7169e-02,  7.1913e-02, -2.7676e-02,  3.6311e-02,  2.1781e-01,
         1.0359e-01,  4.4977e-01,  2.9773e-01, -7.7107e-02,  1.4900e-02,
        -8.3726e-02,  1.3231e+00,  6.0246e-01,  5.0365e-01,  4.4977e-01,
         2.6547e-01, -5.8958e-02,  5.9766e-02, -2.1948e-02,  8.3316e-02,
        -1.0000e+09,  1.1388e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  3.9825e-01,  4.4165e-02,  1.1194e+00,  2.9773e-01,
         3.0679e-01,  6.1045e-01, -4.4630e-03,  5.9357e-01,  1.5389e-01,
         7.5017e-02,  2.4712e-02,  2.6615e-01,  4.0848e-01,  2.9773e-01,
        -4.0521e-02,  2.6578e-01,  8.8600e-02,  6.0775e-02,  1.8884e-02,
         4.2367e-01,  1.9652e-03,  8.6283e-02,  1.0056e-02,  1.4351e-01,
         3.7169e-02,  7.1913e-02, -2.7676e-02,  3.6311e-02,  2.1781e-01,
         1.0359e-01,  4.4977e-01,  2.9773e-01, -7.7107e-02,  1.4900e-02,
        -8.3726e-02,  1.3231e+00,  6.0246e-01,  5.0365e-01,  4.4977e-01,
         2.6547e-01, -5.8958e-02,  5.9766e-02, -2.1948e-02,  8.3316e-02,
        -1.0000e+09,  1.1388e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  3.9825e-01,  4.4165e-02,  1.1194e+00,  2.9773e-01,
         3.0679e-01,  6.1045e-01, -4.4630e-03,  5.9357e-01,  1.5389e-01,
         7.5017e-02,  2.4712e-02,  2.6615e-01,  4.0848e-01,  2.9773e-01,
        -4.0521e-02,  2.6578e-01,  8.8600e-02,  6.0775e-02,  1.8884e-02,
         4.2367e-01,  1.9652e-03,  8.6283e-02,  1.0056e-02,  1.4351e-01,
         3.7169e-02,  7.1913e-02, -2.7676e-02,  3.6311e-02,  2.1781e-01,
         1.0359e-01,  4.4977e-01,  2.9773e-01, -7.7107e-02,  1.4900e-02,
        -8.3726e-02,  1.3231e+00,  6.0246e-01,  5.0365e-01,  4.4977e-01,
         2.6547e-01, -5.8958e-02,  5.9766e-02, -2.1948e-02,  8.3316e-02,
        -1.0000e+09,  1.1388e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  3.9825e-01,  4.4165e-02,  1.1194e+00,  2.9773e-01,
         3.0679e-01,  6.1045e-01, -4.4630e-03,  5.9357e-01,  1.5389e-01,
         7.5017e-02,  2.4712e-02,  2.6615e-01,  4.0848e-01,  2.9773e-01,
        -4.0521e-02,  2.6578e-01,  8.8600e-02,  6.0775e-02,  1.8884e-02,
         4.2367e-01,  1.9652e-03,  8.6283e-02,  1.0056e-02,  1.4351e-01,
         3.7169e-02,  7.1913e-02, -2.7676e-02,  3.6311e-02,  2.1781e-01,
         1.0359e-01,  4.4977e-01,  2.9773e-01, -7.7107e-02,  1.4900e-02,
        -8.3726e-02,  1.3231e+00,  6.0246e-01,  5.0365e-01,  4.4977e-01,
         2.6547e-01, -5.8958e-02,  5.9766e-02, -2.1948e-02,  8.3316e-02,
        -1.0000e+09,  1.1388e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  3.9825e-01,  4.4165e-02,  1.1194e+00,  2.9773e-01,
         3.0679e-01,  6.1045e-01, -4.4630e-03,  5.9357e-01,  1.5389e-01,
         7.5017e-02,  2.4712e-02,  2.6615e-01,  4.0848e-01,  2.9773e-01,
        -4.0521e-02,  2.6578e-01,  8.8600e-02,  6.0775e-02,  1.8884e-02,
         4.2367e-01,  1.9652e-03,  8.6283e-02,  1.0056e-02,  1.4351e-01,
         3.7169e-02,  7.1913e-02, -2.7676e-02,  3.6311e-02,  2.1781e-01,
         1.0359e-01,  4.4977e-01,  2.9773e-01, -7.7107e-02,  1.4900e-02,
        -8.3726e-02,  1.3231e+00,  6.0246e-01,  5.0365e-01,  4.4977e-01,
         2.6547e-01, -5.8958e-02,  5.9766e-02, -2.1948e-02,  8.3316e-02,
        -1.0000e+09,  1.1388e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  3.9825e-01,  4.4165e-02,  1.1194e+00,  2.9773e-01,
         3.0679e-01,  6.1045e-01, -4.4630e-03,  5.9357e-01,  1.5389e-01,
         7.5017e-02,  2.4712e-02,  2.6615e-01,  4.0848e-01,  2.9773e-01,
        -4.0521e-02,  2.6578e-01,  8.8600e-02,  6.0775e-02,  1.8884e-02,
         4.2367e-01,  1.9652e-03,  8.6283e-02,  1.0056e-02,  1.4351e-01,
         3.7169e-02,  7.1913e-02, -2.7676e-02,  3.6311e-02,  2.1781e-01,
         1.0359e-01,  4.4977e-01,  2.9773e-01, -7.7107e-02,  1.4900e-02,
        -8.3726e-02,  1.3231e+00,  6.0246e-01,  5.0365e-01,  4.4977e-01,
         2.6547e-01, -5.8958e-02,  5.9766e-02, -2.1948e-02,  8.3316e-02,
        -1.0000e+09,  1.1388e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  3.9825e-01,  4.4165e-02,  1.1194e+00,  2.9773e-01,
         3.0679e-01,  6.1045e-01, -4.4630e-03,  5.9357e-01,  1.5389e-01,
         7.5017e-02,  2.4712e-02,  2.6615e-01,  4.0848e-01,  2.9773e-01,
        -4.0521e-02,  2.6578e-01,  8.8600e-02,  6.0775e-02,  1.8884e-02,
         4.2367e-01,  1.9652e-03,  8.6283e-02,  1.0056e-02,  1.4351e-01,
         3.7169e-02,  7.1913e-02, -2.7676e-02,  3.6311e-02,  2.1781e-01,
         1.0359e-01,  4.4977e-01,  2.9773e-01, -7.7107e-02,  1.4900e-02,
        -8.3726e-02,  1.3231e+00,  6.0246e-01,  5.0365e-01,  4.4977e-01,
         2.6547e-01, -5.8958e-02,  5.9766e-02, -2.1948e-02,  8.3316e-02,
        -1.0000e+09,  1.1388e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -5.4577e-02,  3.4653e-02,  4.3952e-01,  5.0742e-02,
         2.9773e-01,  2.5037e-02,  9.7216e-02,  4.4977e-01,  6.1428e-02,
         6.8972e-02, -2.0589e-02,  1.5666e-01, -2.1518e-03,  4.4977e-01,
        -1.0218e-02,  6.6124e-03,  1.0792e-01,  8.6283e-02,  8.6060e-01,
         7.5139e-02,  3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -5.4577e-02,  3.4653e-02,  4.3952e-01,  5.0742e-02,
         2.9773e-01,  2.5037e-02,  9.7216e-02,  4.4977e-01,  6.1428e-02,
         6.8972e-02, -2.0589e-02,  1.5666e-01, -2.1518e-03,  4.4977e-01,
        -1.0218e-02,  6.6124e-03,  1.0792e-01,  8.6283e-02,  8.6060e-01,
         7.5139e-02,  3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -5.4577e-02,  3.4653e-02,  4.3952e-01,  5.0742e-02,
         2.9773e-01,  2.5037e-02,  9.7216e-02,  4.4977e-01,  6.1428e-02,
         6.8972e-02, -2.0589e-02,  1.5666e-01, -2.1518e-03,  4.4977e-01,
        -1.0218e-02,  6.6124e-03,  1.0792e-01,  8.6283e-02,  8.6060e-01,
         7.5139e-02,  3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -5.4577e-02,  3.4653e-02,  4.3952e-01,  5.0742e-02,
         2.9773e-01,  2.5037e-02,  9.7216e-02,  4.4977e-01,  6.1428e-02,
         6.8972e-02, -2.0589e-02,  1.5666e-01, -2.1518e-03,  4.4977e-01,
        -1.0218e-02,  6.6124e-03,  1.0792e-01,  8.6283e-02,  8.6060e-01,
         7.5139e-02,  3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -5.4577e-02,  3.4653e-02,  4.3952e-01,  5.0742e-02,
         2.9773e-01,  2.5037e-02,  9.7216e-02,  4.4977e-01,  6.1428e-02,
         6.8972e-02, -2.0589e-02,  1.5666e-01, -2.1518e-03,  4.4977e-01,
        -1.0218e-02,  6.6124e-03,  1.0792e-01,  8.6283e-02,  8.6060e-01,
         7.5139e-02,  3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -5.4577e-02,  3.4653e-02,  4.3952e-01,  5.0742e-02,
         2.9773e-01,  2.5037e-02,  9.7216e-02,  4.4977e-01,  6.1428e-02,
         6.8972e-02, -2.0589e-02,  1.5666e-01, -2.1518e-03,  4.4977e-01,
        -1.0218e-02,  6.6124e-03,  1.0792e-01,  8.6283e-02,  8.6060e-01,
         7.5139e-02,  3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -5.4577e-02,  3.4653e-02,  4.3952e-01,  5.0742e-02,
         2.9773e-01,  2.5037e-02,  9.7216e-02,  4.4977e-01,  6.1428e-02,
         6.8972e-02, -2.0589e-02,  1.5666e-01, -2.1518e-03,  4.4977e-01,
        -1.0218e-02,  6.6124e-03,  1.0792e-01,  8.6283e-02,  8.6060e-01,
         7.5139e-02,  3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -5.4577e-02,  3.4653e-02,  4.3952e-01,  5.0742e-02,
         2.9773e-01,  2.5037e-02,  9.7216e-02,  4.4977e-01,  6.1428e-02,
         6.8972e-02, -2.0589e-02,  1.5666e-01, -2.1518e-03,  4.4977e-01,
        -1.0218e-02,  6.6124e-03,  1.0792e-01,  8.6283e-02,  8.6060e-01,
         7.5139e-02,  3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -5.4577e-02,  3.4653e-02,  4.3952e-01,  5.0742e-02,
         2.9773e-01,  2.5037e-02,  9.7216e-02,  4.4977e-01,  6.1428e-02,
         6.8972e-02, -2.0589e-02,  1.5666e-01, -2.1518e-03,  4.4977e-01,
        -1.0218e-02,  6.6124e-03,  1.0792e-01,  8.6283e-02,  8.6060e-01,
         7.5139e-02,  3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -5.4577e-02,  3.4653e-02,  4.3952e-01,  5.0742e-02,
         2.9773e-01,  2.5037e-02,  9.7216e-02,  4.4977e-01,  6.1428e-02,
         6.8972e-02, -2.0589e-02,  1.5666e-01, -2.1518e-03,  4.4977e-01,
        -1.0218e-02,  6.6124e-03,  1.0792e-01,  8.6283e-02,  8.6060e-01,
         7.5139e-02,  3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -5.4577e-02,  3.4653e-02,  4.3952e-01,  5.0742e-02,
         2.9773e-01,  2.5037e-02,  9.7216e-02,  4.4977e-01,  6.1428e-02,
         6.8972e-02, -2.0589e-02,  1.5666e-01, -2.1518e-03,  4.4977e-01,
        -1.0218e-02,  6.6124e-03,  1.0792e-01,  8.6283e-02,  8.6060e-01,
         7.5139e-02,  3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -5.4577e-02,  3.4653e-02,  4.3952e-01,  5.0742e-02,
         2.9773e-01,  2.5037e-02,  9.7216e-02,  4.4977e-01,  6.1428e-02,
         6.8972e-02, -2.0589e-02,  1.5666e-01, -2.1518e-03,  4.4977e-01,
        -1.0218e-02,  6.6124e-03,  1.0792e-01,  8.6283e-02,  8.6060e-01,
         7.5139e-02,  3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  7.5144e-02,  4.9093e-02, -2.7676e-02,  3.6311e-02,
         1.7344e-01, -6.5562e-03, -1.0000e+09,  1.9529e-01,  1.4713e-01,
         2.4960e-02,  2.0548e-01,  2.3880e-01, -2.1518e-03,  6.0246e-01,
         8.6567e-02,  3.4891e-02, -2.7676e-02,  3.6311e-02,  4.3315e-02,
         3.6004e-02,  1.2986e-01,  9.4834e-02, -7.7107e-02,  6.7064e-02,
        -2.7676e-02,  3.6311e-02,  2.6615e-01,  1.5389e-01, -2.7676e-02,
         9.3427e-02, -1.0220e-01,  5.0742e-02,  9.4834e-02,  2.9773e-01,
        -1.0000e+09,  8.6060e-01,  6.7344e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  7.5144e-02,  4.9093e-02, -2.7676e-02,  3.6311e-02,
         1.7344e-01, -6.5562e-03, -1.0000e+09,  1.9529e-01,  1.4713e-01,
         2.4960e-02,  2.0548e-01,  2.3880e-01, -2.1518e-03,  6.0246e-01,
         8.6567e-02,  3.4891e-02, -2.7676e-02,  3.6311e-02,  4.3315e-02,
         3.6004e-02,  1.2986e-01,  9.4834e-02, -7.7107e-02,  6.7064e-02,
        -2.7676e-02,  3.6311e-02,  2.6615e-01,  1.5389e-01, -2.7676e-02,
         9.3427e-02, -1.0220e-01,  5.0742e-02,  9.4834e-02,  2.9773e-01,
        -1.0000e+09,  8.6060e-01,  6.7344e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  7.5144e-02,  4.9093e-02, -2.7676e-02,  3.6311e-02,
         1.7344e-01, -6.5562e-03, -1.0000e+09,  1.9529e-01,  1.4713e-01,
         2.4960e-02,  2.0548e-01,  2.3880e-01, -2.1518e-03,  6.0246e-01,
         8.6567e-02,  3.4891e-02, -2.7676e-02,  3.6311e-02,  4.3315e-02,
         3.6004e-02,  1.2986e-01,  9.4834e-02, -7.7107e-02,  6.7064e-02,
        -2.7676e-02,  3.6311e-02,  2.6615e-01,  1.5389e-01, -2.7676e-02,
         9.3427e-02, -1.0220e-01,  5.0742e-02,  9.4834e-02,  2.9773e-01,
        -1.0000e+09,  8.6060e-01,  6.7344e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  7.5144e-02,  4.9093e-02, -2.7676e-02,  3.6311e-02,
         1.7344e-01, -6.5562e-03, -1.0000e+09,  1.9529e-01,  1.4713e-01,
         2.4960e-02,  2.0548e-01,  2.3880e-01, -2.1518e-03,  6.0246e-01,
         8.6567e-02,  3.4891e-02, -2.7676e-02,  3.6311e-02,  4.3315e-02,
         3.6004e-02,  1.2986e-01,  9.4834e-02, -7.7107e-02,  6.7064e-02,
        -2.7676e-02,  3.6311e-02,  2.6615e-01,  1.5389e-01, -2.7676e-02,
         9.3427e-02, -1.0220e-01,  5.0742e-02,  9.4834e-02,  2.9773e-01,
        -1.0000e+09,  8.6060e-01,  6.7344e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  7.5144e-02,  4.9093e-02, -2.7676e-02,  3.6311e-02,
         1.7344e-01, -6.5562e-03, -1.0000e+09,  1.9529e-01,  1.4713e-01,
         2.4960e-02,  2.0548e-01,  2.3880e-01, -2.1518e-03,  6.0246e-01,
         8.6567e-02,  3.4891e-02, -2.7676e-02,  3.6311e-02,  4.3315e-02,
         3.6004e-02,  1.2986e-01,  9.4834e-02, -7.7107e-02,  6.7064e-02,
        -2.7676e-02,  3.6311e-02,  2.6615e-01,  1.5389e-01, -2.7676e-02,
         9.3427e-02, -1.0220e-01,  5.0742e-02,  9.4834e-02,  2.9773e-01,
        -1.0000e+09,  8.6060e-01,  6.7344e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  7.5144e-02,  4.9093e-02, -2.7676e-02,  3.6311e-02,
         1.7344e-01, -6.5562e-03, -1.0000e+09,  1.9529e-01,  1.4713e-01,
         2.4960e-02,  2.0548e-01,  2.3880e-01, -2.1518e-03,  6.0246e-01,
         8.6567e-02,  3.4891e-02, -2.7676e-02,  3.6311e-02,  4.3315e-02,
         3.6004e-02,  1.2986e-01,  9.4834e-02, -7.7107e-02,  6.7064e-02,
        -2.7676e-02,  3.6311e-02,  2.6615e-01,  1.5389e-01, -2.7676e-02,
         9.3427e-02, -1.0220e-01,  5.0742e-02,  9.4834e-02,  2.9773e-01,
        -1.0000e+09,  8.6060e-01,  6.7344e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  7.5144e-02,  4.9093e-02, -2.7676e-02,  3.6311e-02,
         1.7344e-01, -6.5562e-03, -1.0000e+09,  1.9529e-01,  1.4713e-01,
         2.4960e-02,  2.0548e-01,  2.3880e-01, -2.1518e-03,  6.0246e-01,
         8.6567e-02,  3.4891e-02, -2.7676e-02,  3.6311e-02,  4.3315e-02,
         3.6004e-02,  1.2986e-01,  9.4834e-02, -7.7107e-02,  6.7064e-02,
        -2.7676e-02,  3.6311e-02,  2.6615e-01,  1.5389e-01, -2.7676e-02,
         9.3427e-02, -1.0220e-01,  5.0742e-02,  9.4834e-02,  2.9773e-01,
        -1.0000e+09,  8.6060e-01,  6.7344e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  7.5144e-02,  4.9093e-02, -2.7676e-02,  3.6311e-02,
         1.7344e-01, -6.5562e-03, -1.0000e+09,  1.9529e-01,  1.4713e-01,
         2.4960e-02,  2.0548e-01,  2.3880e-01, -2.1518e-03,  6.0246e-01,
         8.6567e-02,  3.4891e-02, -2.7676e-02,  3.6311e-02,  4.3315e-02,
         3.6004e-02,  1.2986e-01,  9.4834e-02, -7.7107e-02,  6.7064e-02,
        -2.7676e-02,  3.6311e-02,  2.6615e-01,  1.5389e-01, -2.7676e-02,
         9.3427e-02, -1.0220e-01,  5.0742e-02,  9.4834e-02,  2.9773e-01,
        -1.0000e+09,  8.6060e-01,  6.7344e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  7.5144e-02,  4.9093e-02, -2.7676e-02,  3.6311e-02,
         1.7344e-01, -6.5562e-03, -1.0000e+09,  1.9529e-01,  1.4713e-01,
         2.4960e-02,  2.0548e-01,  2.3880e-01, -2.1518e-03,  6.0246e-01,
         8.6567e-02,  3.4891e-02, -2.7676e-02,  3.6311e-02,  4.3315e-02,
         3.6004e-02,  1.2986e-01,  9.4834e-02, -7.7107e-02,  6.7064e-02,
        -2.7676e-02,  3.6311e-02,  2.6615e-01,  1.5389e-01, -2.7676e-02,
         9.3427e-02, -1.0220e-01,  5.0742e-02,  9.4834e-02,  2.9773e-01,
        -1.0000e+09,  8.6060e-01,  6.7344e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  7.5144e-02,  4.9093e-02, -2.7676e-02,  3.6311e-02,
         1.7344e-01, -6.5562e-03, -1.0000e+09,  1.9529e-01,  1.4713e-01,
         2.4960e-02,  2.0548e-01,  2.3880e-01, -2.1518e-03,  6.0246e-01,
         8.6567e-02,  3.4891e-02, -2.7676e-02,  3.6311e-02,  4.3315e-02,
         3.6004e-02,  1.2986e-01,  9.4834e-02, -7.7107e-02,  6.7064e-02,
        -2.7676e-02,  3.6311e-02,  2.6615e-01,  1.5389e-01, -2.7676e-02,
         9.3427e-02, -1.0220e-01,  5.0742e-02,  9.4834e-02,  2.9773e-01,
        -1.0000e+09,  8.6060e-01,  6.7344e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  7.5144e-02,  4.9093e-02, -2.7676e-02,  3.6311e-02,
         1.7344e-01, -6.5562e-03, -1.0000e+09,  1.9529e-01,  1.4713e-01,
         2.4960e-02,  2.0548e-01,  2.3880e-01, -2.1518e-03,  6.0246e-01,
         8.6567e-02,  3.4891e-02, -2.7676e-02,  3.6311e-02,  4.3315e-02,
         3.6004e-02,  1.2986e-01,  9.4834e-02, -7.7107e-02,  6.7064e-02,
        -2.7676e-02,  3.6311e-02,  2.6615e-01,  1.5389e-01, -2.7676e-02,
         9.3427e-02, -1.0220e-01,  5.0742e-02,  9.4834e-02,  2.9773e-01,
        -1.0000e+09,  8.6060e-01,  6.7344e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  7.5144e-02,  4.9093e-02, -2.7676e-02,  3.6311e-02,
         1.7344e-01, -6.5562e-03, -1.0000e+09,  1.9529e-01,  1.4713e-01,
         2.4960e-02,  2.0548e-01,  2.3880e-01, -2.1518e-03,  6.0246e-01,
         8.6567e-02,  3.4891e-02, -2.7676e-02,  3.6311e-02,  4.3315e-02,
         3.6004e-02,  1.2986e-01,  9.4834e-02, -7.7107e-02,  6.7064e-02,
        -2.7676e-02,  3.6311e-02,  2.6615e-01,  1.5389e-01, -2.7676e-02,
         9.3427e-02, -1.0220e-01,  5.0742e-02,  9.4834e-02,  2.9773e-01,
        -1.0000e+09,  8.6060e-01,  6.7344e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  6.8794e-02, -1.9156e-03,  7.4027e-02,
        -2.1465e-02,  8.6283e-02,  1.0056e-02,  9.7448e-02,  1.4232e-01,
         2.2607e-01,  4.4977e-01,  2.9773e-01, -1.0000e+09,  5.0742e-02,
         2.8717e-01,  1.3724e-01, -1.1636e-02,  8.2257e-02, -2.1518e-03,
         2.9220e-02, -4.5058e-03,  9.9946e-02,  1.5879e-01,  4.4268e-02,
        -2.1518e-03,  8.7602e-02, -2.1518e-03,  7.0379e-02, -2.0392e-02,
         1.2945e-01,  3.7736e-02,  4.2931e-02, -2.1518e-03,  1.1194e+00,
         6.5877e-02,  8.3316e-02, -2.0392e-02,  2.0452e-01, -2.1465e-02,
         2.9773e-01,  4.2112e-02,  4.4977e-01,  1.3724e-01,  4.3917e-02,
        -1.6013e-02,  5.0742e-02,  3.3030e-02,  8.0457e-01,  1.9686e-02,
         5.5098e-01,  2.3997e-01,  1.5847e-01, -1.3469e-02,  5.2886e-02,
         1.2945e-01,  7.9818e-02,  3.3441e-01,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  6.8794e-02, -1.9156e-03,  7.4027e-02,
        -2.1465e-02,  8.6283e-02,  1.0056e-02,  9.7448e-02,  1.4232e-01,
         2.2607e-01,  4.4977e-01,  2.9773e-01, -1.0000e+09,  5.0742e-02,
         2.8717e-01,  1.3724e-01, -1.1636e-02,  8.2257e-02, -2.1518e-03,
         2.9220e-02, -4.5058e-03,  9.9946e-02,  1.5879e-01,  4.4268e-02,
        -2.1518e-03,  8.7602e-02, -2.1518e-03,  7.0379e-02, -2.0392e-02,
         1.2945e-01,  3.7736e-02,  4.2931e-02, -2.1518e-03,  1.1194e+00,
         6.5877e-02,  8.3316e-02, -2.0392e-02,  2.0452e-01, -2.1465e-02,
         2.9773e-01,  4.2112e-02,  4.4977e-01,  1.3724e-01,  4.3917e-02,
        -1.6013e-02,  5.0742e-02,  3.3030e-02,  8.0457e-01,  1.9686e-02,
         5.5098e-01,  2.3997e-01,  1.5847e-01, -1.3469e-02,  5.2886e-02,
         1.2945e-01,  7.9818e-02,  3.3441e-01,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  6.8794e-02, -1.9156e-03,  7.4027e-02,
        -2.1465e-02,  8.6283e-02,  1.0056e-02,  9.7448e-02,  1.4232e-01,
         2.2607e-01,  4.4977e-01,  2.9773e-01, -1.0000e+09,  5.0742e-02,
         2.8717e-01,  1.3724e-01, -1.1636e-02,  8.2257e-02, -2.1518e-03,
         2.9220e-02, -4.5058e-03,  9.9946e-02,  1.5879e-01,  4.4268e-02,
        -2.1518e-03,  8.7602e-02, -2.1518e-03,  7.0379e-02, -2.0392e-02,
         1.2945e-01,  3.7736e-02,  4.2931e-02, -2.1518e-03,  1.1194e+00,
         6.5877e-02,  8.3316e-02, -2.0392e-02,  2.0452e-01, -2.1465e-02,
         2.9773e-01,  4.2112e-02,  4.4977e-01,  1.3724e-01,  4.3917e-02,
        -1.6013e-02,  5.0742e-02,  3.3030e-02,  8.0457e-01,  1.9686e-02,
         5.5098e-01,  2.3997e-01,  1.5847e-01, -1.3469e-02,  5.2886e-02,
         1.2945e-01,  7.9818e-02,  3.3441e-01,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  6.8794e-02, -1.9156e-03,  7.4027e-02,
        -2.1465e-02,  8.6283e-02,  1.0056e-02,  9.7448e-02,  1.4232e-01,
         2.2607e-01,  4.4977e-01,  2.9773e-01, -1.0000e+09,  5.0742e-02,
         2.8717e-01,  1.3724e-01, -1.1636e-02,  8.2257e-02, -2.1518e-03,
         2.9220e-02, -4.5058e-03,  9.9946e-02,  1.5879e-01,  4.4268e-02,
        -2.1518e-03,  8.7602e-02, -2.1518e-03,  7.0379e-02, -2.0392e-02,
         1.2945e-01,  3.7736e-02,  4.2931e-02, -2.1518e-03,  1.1194e+00,
         6.5877e-02,  8.3316e-02, -2.0392e-02,  2.0452e-01, -2.1465e-02,
         2.9773e-01,  4.2112e-02,  4.4977e-01,  1.3724e-01,  4.3917e-02,
        -1.6013e-02,  5.0742e-02,  3.3030e-02,  8.0457e-01,  1.9686e-02,
         5.5098e-01,  2.3997e-01,  1.5847e-01, -1.3469e-02,  5.2886e-02,
         1.2945e-01,  7.9818e-02,  3.3441e-01,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  6.8794e-02, -1.9156e-03,  7.4027e-02,
        -2.1465e-02,  8.6283e-02,  1.0056e-02,  9.7448e-02,  1.4232e-01,
         2.2607e-01,  4.4977e-01,  2.9773e-01, -1.0000e+09,  5.0742e-02,
         2.8717e-01,  1.3724e-01, -1.1636e-02,  8.2257e-02, -2.1518e-03,
         2.9220e-02, -4.5058e-03,  9.9946e-02,  1.5879e-01,  4.4268e-02,
        -2.1518e-03,  8.7602e-02, -2.1518e-03,  7.0379e-02, -2.0392e-02,
         1.2945e-01,  3.7736e-02,  4.2931e-02, -2.1518e-03,  1.1194e+00,
         6.5877e-02,  8.3316e-02, -2.0392e-02,  2.0452e-01, -2.1465e-02,
         2.9773e-01,  4.2112e-02,  4.4977e-01,  1.3724e-01,  4.3917e-02,
        -1.6013e-02,  5.0742e-02,  3.3030e-02,  8.0457e-01,  1.9686e-02,
         5.5098e-01,  2.3997e-01,  1.5847e-01, -1.3469e-02,  5.2886e-02,
         1.2945e-01,  7.9818e-02,  3.3441e-01,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  6.8794e-02, -1.9156e-03,  7.4027e-02,
        -2.1465e-02,  8.6283e-02,  1.0056e-02,  9.7448e-02,  1.4232e-01,
         2.2607e-01,  4.4977e-01,  2.9773e-01, -1.0000e+09,  5.0742e-02,
         2.8717e-01,  1.3724e-01, -1.1636e-02,  8.2257e-02, -2.1518e-03,
         2.9220e-02, -4.5058e-03,  9.9946e-02,  1.5879e-01,  4.4268e-02,
        -2.1518e-03,  8.7602e-02, -2.1518e-03,  7.0379e-02, -2.0392e-02,
         1.2945e-01,  3.7736e-02,  4.2931e-02, -2.1518e-03,  1.1194e+00,
         6.5877e-02,  8.3316e-02, -2.0392e-02,  2.0452e-01, -2.1465e-02,
         2.9773e-01,  4.2112e-02,  4.4977e-01,  1.3724e-01,  4.3917e-02,
        -1.6013e-02,  5.0742e-02,  3.3030e-02,  8.0457e-01,  1.9686e-02,
         5.5098e-01,  2.3997e-01,  1.5847e-01, -1.3469e-02,  5.2886e-02,
         1.2945e-01,  7.9818e-02,  3.3441e-01,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  6.8794e-02, -1.9156e-03,  7.4027e-02,
        -2.1465e-02,  8.6283e-02,  1.0056e-02,  9.7448e-02,  1.4232e-01,
         2.2607e-01,  4.4977e-01,  2.9773e-01, -1.0000e+09,  5.0742e-02,
         2.8717e-01,  1.3724e-01, -1.1636e-02,  8.2257e-02, -2.1518e-03,
         2.9220e-02, -4.5058e-03,  9.9946e-02,  1.5879e-01,  4.4268e-02,
        -2.1518e-03,  8.7602e-02, -2.1518e-03,  7.0379e-02, -2.0392e-02,
         1.2945e-01,  3.7736e-02,  4.2931e-02, -2.1518e-03,  1.1194e+00,
         6.5877e-02,  8.3316e-02, -2.0392e-02,  2.0452e-01, -2.1465e-02,
         2.9773e-01,  4.2112e-02,  4.4977e-01,  1.3724e-01,  4.3917e-02,
        -1.6013e-02,  5.0742e-02,  3.3030e-02,  8.0457e-01,  1.9686e-02,
         5.5098e-01,  2.3997e-01,  1.5847e-01, -1.3469e-02,  5.2886e-02,
         1.2945e-01,  7.9818e-02,  3.3441e-01,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  6.8794e-02, -1.9156e-03,  7.4027e-02,
        -2.1465e-02,  8.6283e-02,  1.0056e-02,  9.7448e-02,  1.4232e-01,
         2.2607e-01,  4.4977e-01,  2.9773e-01, -1.0000e+09,  5.0742e-02,
         2.8717e-01,  1.3724e-01, -1.1636e-02,  8.2257e-02, -2.1518e-03,
         2.9220e-02, -4.5058e-03,  9.9946e-02,  1.5879e-01,  4.4268e-02,
        -2.1518e-03,  8.7602e-02, -2.1518e-03,  7.0379e-02, -2.0392e-02,
         1.2945e-01,  3.7736e-02,  4.2931e-02, -2.1518e-03,  1.1194e+00,
         6.5877e-02,  8.3316e-02, -2.0392e-02,  2.0452e-01, -2.1465e-02,
         2.9773e-01,  4.2112e-02,  4.4977e-01,  1.3724e-01,  4.3917e-02,
        -1.6013e-02,  5.0742e-02,  3.3030e-02,  8.0457e-01,  1.9686e-02,
         5.5098e-01,  2.3997e-01,  1.5847e-01, -1.3469e-02,  5.2886e-02,
         1.2945e-01,  7.9818e-02,  3.3441e-01,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  6.8794e-02, -1.9156e-03,  7.4027e-02,
        -2.1465e-02,  8.6283e-02,  1.0056e-02,  9.7448e-02,  1.4232e-01,
         2.2607e-01,  4.4977e-01,  2.9773e-01, -1.0000e+09,  5.0742e-02,
         2.8717e-01,  1.3724e-01, -1.1636e-02,  8.2257e-02, -2.1518e-03,
         2.9220e-02, -4.5058e-03,  9.9946e-02,  1.5879e-01,  4.4268e-02,
        -2.1518e-03,  8.7602e-02, -2.1518e-03,  7.0379e-02, -2.0392e-02,
         1.2945e-01,  3.7736e-02,  4.2931e-02, -2.1518e-03,  1.1194e+00,
         6.5877e-02,  8.3316e-02, -2.0392e-02,  2.0452e-01, -2.1465e-02,
         2.9773e-01,  4.2112e-02,  4.4977e-01,  1.3724e-01,  4.3917e-02,
        -1.6013e-02,  5.0742e-02,  3.3030e-02,  8.0457e-01,  1.9686e-02,
         5.5098e-01,  2.3997e-01,  1.5847e-01, -1.3469e-02,  5.2886e-02,
         1.2945e-01,  7.9818e-02,  3.3441e-01,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  6.8794e-02, -1.9156e-03,  7.4027e-02,
        -2.1465e-02,  8.6283e-02,  1.0056e-02,  9.7448e-02,  1.4232e-01,
         2.2607e-01,  4.4977e-01,  2.9773e-01, -1.0000e+09,  5.0742e-02,
         2.8717e-01,  1.3724e-01, -1.1636e-02,  8.2257e-02, -2.1518e-03,
         2.9220e-02, -4.5058e-03,  9.9946e-02,  1.5879e-01,  4.4268e-02,
        -2.1518e-03,  8.7602e-02, -2.1518e-03,  7.0379e-02, -2.0392e-02,
         1.2945e-01,  3.7736e-02,  4.2931e-02, -2.1518e-03,  1.1194e+00,
         6.5877e-02,  8.3316e-02, -2.0392e-02,  2.0452e-01, -2.1465e-02,
         2.9773e-01,  4.2112e-02,  4.4977e-01,  1.3724e-01,  4.3917e-02,
        -1.6013e-02,  5.0742e-02,  3.3030e-02,  8.0457e-01,  1.9686e-02,
         5.5098e-01,  2.3997e-01,  1.5847e-01, -1.3469e-02,  5.2886e-02,
         1.2945e-01,  7.9818e-02,  3.3441e-01,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  6.8794e-02, -1.9156e-03,  7.4027e-02,
        -2.1465e-02,  8.6283e-02,  1.0056e-02,  9.7448e-02,  1.4232e-01,
         2.2607e-01,  4.4977e-01,  2.9773e-01, -1.0000e+09,  5.0742e-02,
         2.8717e-01,  1.3724e-01, -1.1636e-02,  8.2257e-02, -2.1518e-03,
         2.9220e-02, -4.5058e-03,  9.9946e-02,  1.5879e-01,  4.4268e-02,
        -2.1518e-03,  8.7602e-02, -2.1518e-03,  7.0379e-02, -2.0392e-02,
         1.2945e-01,  3.7736e-02,  4.2931e-02, -2.1518e-03,  1.1194e+00,
         6.5877e-02,  8.3316e-02, -2.0392e-02,  2.0452e-01, -2.1465e-02,
         2.9773e-01,  4.2112e-02,  4.4977e-01,  1.3724e-01,  4.3917e-02,
        -1.6013e-02,  5.0742e-02,  3.3030e-02,  8.0457e-01,  1.9686e-02,
         5.5098e-01,  2.3997e-01,  1.5847e-01, -1.3469e-02,  5.2886e-02,
         1.2945e-01,  7.9818e-02,  3.3441e-01,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  6.8794e-02, -1.9156e-03,  7.4027e-02,
        -2.1465e-02,  8.6283e-02,  1.0056e-02,  9.7448e-02,  1.4232e-01,
         2.2607e-01,  4.4977e-01,  2.9773e-01, -1.0000e+09,  5.0742e-02,
         2.8717e-01,  1.3724e-01, -1.1636e-02,  8.2257e-02, -2.1518e-03,
         2.9220e-02, -4.5058e-03,  9.9946e-02,  1.5879e-01,  4.4268e-02,
        -2.1518e-03,  8.7602e-02, -2.1518e-03,  7.0379e-02, -2.0392e-02,
         1.2945e-01,  3.7736e-02,  4.2931e-02, -2.1518e-03,  1.1194e+00,
         6.5877e-02,  8.3316e-02, -2.0392e-02,  2.0452e-01, -2.1465e-02,
         2.9773e-01,  4.2112e-02,  4.4977e-01,  1.3724e-01,  4.3917e-02,
        -1.6013e-02,  5.0742e-02,  3.3030e-02,  8.0457e-01,  1.9686e-02,
         5.5098e-01,  2.3997e-01,  1.5847e-01, -1.3469e-02,  5.2886e-02,
         1.2945e-01,  7.9818e-02,  3.3441e-01,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -2.2344e-02,  7.2689e-02, -7.6584e-03,  1.5909e-01,
        -2.0249e-02,  5.0742e-02,  4.9408e-02,  2.4864e-02,  3.2737e-02,
         1.2260e-01, -1.9221e-02,  8.3847e-02,  7.7454e-02,  8.6283e-02,
         1.0056e-02,  1.1836e-01, -1.0621e-01,  5.0742e-02, -1.8986e-02,
         8.6060e-01,  1.9529e-01,  2.0504e-01, -2.0392e-02,  3.1240e-02,
        -1.3911e-01,  8.3312e-02,  1.2260e-01,  2.2313e-01, -1.0000e+09,
        -6.5562e-03,  2.5951e-03,  3.3796e-02, -2.1518e-03,  4.9491e-02,
        -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -2.2344e-02,  7.2689e-02, -7.6584e-03,  1.5909e-01,
        -2.0249e-02,  5.0742e-02,  4.9408e-02,  2.4864e-02,  3.2737e-02,
         1.2260e-01, -1.9221e-02,  8.3847e-02,  7.7454e-02,  8.6283e-02,
         1.0056e-02,  1.1836e-01, -1.0621e-01,  5.0742e-02, -1.8986e-02,
         8.6060e-01,  1.9529e-01,  2.0504e-01, -2.0392e-02,  3.1240e-02,
        -1.3911e-01,  8.3312e-02,  1.2260e-01,  2.2313e-01, -1.0000e+09,
        -6.5562e-03,  2.5951e-03,  3.3796e-02, -2.1518e-03,  4.9491e-02,
        -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -2.2344e-02,  7.2689e-02, -7.6584e-03,  1.5909e-01,
        -2.0249e-02,  5.0742e-02,  4.9408e-02,  2.4864e-02,  3.2737e-02,
         1.2260e-01, -1.9221e-02,  8.3847e-02,  7.7454e-02,  8.6283e-02,
         1.0056e-02,  1.1836e-01, -1.0621e-01,  5.0742e-02, -1.8986e-02,
         8.6060e-01,  1.9529e-01,  2.0504e-01, -2.0392e-02,  3.1240e-02,
        -1.3911e-01,  8.3312e-02,  1.2260e-01,  2.2313e-01, -1.0000e+09,
        -6.5562e-03,  2.5951e-03,  3.3796e-02, -2.1518e-03,  4.9491e-02,
        -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -2.2344e-02,  7.2689e-02, -7.6584e-03,  1.5909e-01,
        -2.0249e-02,  5.0742e-02,  4.9408e-02,  2.4864e-02,  3.2737e-02,
         1.2260e-01, -1.9221e-02,  8.3847e-02,  7.7454e-02,  8.6283e-02,
         1.0056e-02,  1.1836e-01, -1.0621e-01,  5.0742e-02, -1.8986e-02,
         8.6060e-01,  1.9529e-01,  2.0504e-01, -2.0392e-02,  3.1240e-02,
        -1.3911e-01,  8.3312e-02,  1.2260e-01,  2.2313e-01, -1.0000e+09,
        -6.5562e-03,  2.5951e-03,  3.3796e-02, -2.1518e-03,  4.9491e-02,
        -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -2.2344e-02,  7.2689e-02, -7.6584e-03,  1.5909e-01,
        -2.0249e-02,  5.0742e-02,  4.9408e-02,  2.4864e-02,  3.2737e-02,
         1.2260e-01, -1.9221e-02,  8.3847e-02,  7.7454e-02,  8.6283e-02,
         1.0056e-02,  1.1836e-01, -1.0621e-01,  5.0742e-02, -1.8986e-02,
         8.6060e-01,  1.9529e-01,  2.0504e-01, -2.0392e-02,  3.1240e-02,
        -1.3911e-01,  8.3312e-02,  1.2260e-01,  2.2313e-01, -1.0000e+09,
        -6.5562e-03,  2.5951e-03,  3.3796e-02, -2.1518e-03,  4.9491e-02,
        -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -2.2344e-02,  7.2689e-02, -7.6584e-03,  1.5909e-01,
        -2.0249e-02,  5.0742e-02,  4.9408e-02,  2.4864e-02,  3.2737e-02,
         1.2260e-01, -1.9221e-02,  8.3847e-02,  7.7454e-02,  8.6283e-02,
         1.0056e-02,  1.1836e-01, -1.0621e-01,  5.0742e-02, -1.8986e-02,
         8.6060e-01,  1.9529e-01,  2.0504e-01, -2.0392e-02,  3.1240e-02,
        -1.3911e-01,  8.3312e-02,  1.2260e-01,  2.2313e-01, -1.0000e+09,
        -6.5562e-03,  2.5951e-03,  3.3796e-02, -2.1518e-03,  4.9491e-02,
        -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -2.2344e-02,  7.2689e-02, -7.6584e-03,  1.5909e-01,
        -2.0249e-02,  5.0742e-02,  4.9408e-02,  2.4864e-02,  3.2737e-02,
         1.2260e-01, -1.9221e-02,  8.3847e-02,  7.7454e-02,  8.6283e-02,
         1.0056e-02,  1.1836e-01, -1.0621e-01,  5.0742e-02, -1.8986e-02,
         8.6060e-01,  1.9529e-01,  2.0504e-01, -2.0392e-02,  3.1240e-02,
        -1.3911e-01,  8.3312e-02,  1.2260e-01,  2.2313e-01, -1.0000e+09,
        -6.5562e-03,  2.5951e-03,  3.3796e-02, -2.1518e-03,  4.9491e-02,
        -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -2.2344e-02,  7.2689e-02, -7.6584e-03,  1.5909e-01,
        -2.0249e-02,  5.0742e-02,  4.9408e-02,  2.4864e-02,  3.2737e-02,
         1.2260e-01, -1.9221e-02,  8.3847e-02,  7.7454e-02,  8.6283e-02,
         1.0056e-02,  1.1836e-01, -1.0621e-01,  5.0742e-02, -1.8986e-02,
         8.6060e-01,  1.9529e-01,  2.0504e-01, -2.0392e-02,  3.1240e-02,
        -1.3911e-01,  8.3312e-02,  1.2260e-01,  2.2313e-01, -1.0000e+09,
        -6.5562e-03,  2.5951e-03,  3.3796e-02, -2.1518e-03,  4.9491e-02,
        -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -2.2344e-02,  7.2689e-02, -7.6584e-03,  1.5909e-01,
        -2.0249e-02,  5.0742e-02,  4.9408e-02,  2.4864e-02,  3.2737e-02,
         1.2260e-01, -1.9221e-02,  8.3847e-02,  7.7454e-02,  8.6283e-02,
         1.0056e-02,  1.1836e-01, -1.0621e-01,  5.0742e-02, -1.8986e-02,
         8.6060e-01,  1.9529e-01,  2.0504e-01, -2.0392e-02,  3.1240e-02,
        -1.3911e-01,  8.3312e-02,  1.2260e-01,  2.2313e-01, -1.0000e+09,
        -6.5562e-03,  2.5951e-03,  3.3796e-02, -2.1518e-03,  4.9491e-02,
        -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -2.2344e-02,  7.2689e-02, -7.6584e-03,  1.5909e-01,
        -2.0249e-02,  5.0742e-02,  4.9408e-02,  2.4864e-02,  3.2737e-02,
         1.2260e-01, -1.9221e-02,  8.3847e-02,  7.7454e-02,  8.6283e-02,
         1.0056e-02,  1.1836e-01, -1.0621e-01,  5.0742e-02, -1.8986e-02,
         8.6060e-01,  1.9529e-01,  2.0504e-01, -2.0392e-02,  3.1240e-02,
        -1.3911e-01,  8.3312e-02,  1.2260e-01,  2.2313e-01, -1.0000e+09,
        -6.5562e-03,  2.5951e-03,  3.3796e-02, -2.1518e-03,  4.9491e-02,
        -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -2.2344e-02,  7.2689e-02, -7.6584e-03,  1.5909e-01,
        -2.0249e-02,  5.0742e-02,  4.9408e-02,  2.4864e-02,  3.2737e-02,
         1.2260e-01, -1.9221e-02,  8.3847e-02,  7.7454e-02,  8.6283e-02,
         1.0056e-02,  1.1836e-01, -1.0621e-01,  5.0742e-02, -1.8986e-02,
         8.6060e-01,  1.9529e-01,  2.0504e-01, -2.0392e-02,  3.1240e-02,
        -1.3911e-01,  8.3312e-02,  1.2260e-01,  2.2313e-01, -1.0000e+09,
        -6.5562e-03,  2.5951e-03,  3.3796e-02, -2.1518e-03,  4.9491e-02,
        -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -2.2344e-02,  7.2689e-02, -7.6584e-03,  1.5909e-01,
        -2.0249e-02,  5.0742e-02,  4.9408e-02,  2.4864e-02,  3.2737e-02,
         1.2260e-01, -1.9221e-02,  8.3847e-02,  7.7454e-02,  8.6283e-02,
         1.0056e-02,  1.1836e-01, -1.0621e-01,  5.0742e-02, -1.8986e-02,
         8.6060e-01,  1.9529e-01,  2.0504e-01, -2.0392e-02,  3.1240e-02,
        -1.3911e-01,  8.3312e-02,  1.2260e-01,  2.2313e-01, -1.0000e+09,
        -6.5562e-03,  2.5951e-03,  3.3796e-02, -2.1518e-03,  4.9491e-02,
        -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7152e-02,  8.6060e-01, -1.8571e-02,  2.1326e-01,
        -3.1070e-02,  1.5625e-01, -2.2231e-02,  1.7420e-01, -7.6728e-02,
         8.0831e-02,  9.4834e-02,  6.3272e-02,  5.3537e-01,  8.9564e-02,
        -1.7184e-02,  4.3060e-02,  1.5625e-01,  6.8436e-02, -1.5793e-02,
         4.2367e-01,  1.4391e-02,  9.5352e-02,  6.0246e-01,  6.8436e-02,
         1.7420e-01,  2.8717e-01,  2.2010e-02,  1.1194e+00,  3.1240e-02,
         4.6943e-02,  1.0792e-01,  8.6283e-02,  8.6060e-01,  3.2138e-01,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7152e-02,  8.6060e-01, -1.8571e-02,  2.1326e-01,
        -3.1070e-02,  1.5625e-01, -2.2231e-02,  1.7420e-01, -7.6728e-02,
         8.0831e-02,  9.4834e-02,  6.3272e-02,  5.3537e-01,  8.9564e-02,
        -1.7184e-02,  4.3060e-02,  1.5625e-01,  6.8436e-02, -1.5793e-02,
         4.2367e-01,  1.4391e-02,  9.5352e-02,  6.0246e-01,  6.8436e-02,
         1.7420e-01,  2.8717e-01,  2.2010e-02,  1.1194e+00,  3.1240e-02,
         4.6943e-02,  1.0792e-01,  8.6283e-02,  8.6060e-01,  3.2138e-01,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7152e-02,  8.6060e-01, -1.8571e-02,  2.1326e-01,
        -3.1070e-02,  1.5625e-01, -2.2231e-02,  1.7420e-01, -7.6728e-02,
         8.0831e-02,  9.4834e-02,  6.3272e-02,  5.3537e-01,  8.9564e-02,
        -1.7184e-02,  4.3060e-02,  1.5625e-01,  6.8436e-02, -1.5793e-02,
         4.2367e-01,  1.4391e-02,  9.5352e-02,  6.0246e-01,  6.8436e-02,
         1.7420e-01,  2.8717e-01,  2.2010e-02,  1.1194e+00,  3.1240e-02,
         4.6943e-02,  1.0792e-01,  8.6283e-02,  8.6060e-01,  3.2138e-01,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7152e-02,  8.6060e-01, -1.8571e-02,  2.1326e-01,
        -3.1070e-02,  1.5625e-01, -2.2231e-02,  1.7420e-01, -7.6728e-02,
         8.0831e-02,  9.4834e-02,  6.3272e-02,  5.3537e-01,  8.9564e-02,
        -1.7184e-02,  4.3060e-02,  1.5625e-01,  6.8436e-02, -1.5793e-02,
         4.2367e-01,  1.4391e-02,  9.5352e-02,  6.0246e-01,  6.8436e-02,
         1.7420e-01,  2.8717e-01,  2.2010e-02,  1.1194e+00,  3.1240e-02,
         4.6943e-02,  1.0792e-01,  8.6283e-02,  8.6060e-01,  3.2138e-01,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7152e-02,  8.6060e-01, -1.8571e-02,  2.1326e-01,
        -3.1070e-02,  1.5625e-01, -2.2231e-02,  1.7420e-01, -7.6728e-02,
         8.0831e-02,  9.4834e-02,  6.3272e-02,  5.3537e-01,  8.9564e-02,
        -1.7184e-02,  4.3060e-02,  1.5625e-01,  6.8436e-02, -1.5793e-02,
         4.2367e-01,  1.4391e-02,  9.5352e-02,  6.0246e-01,  6.8436e-02,
         1.7420e-01,  2.8717e-01,  2.2010e-02,  1.1194e+00,  3.1240e-02,
         4.6943e-02,  1.0792e-01,  8.6283e-02,  8.6060e-01,  3.2138e-01,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7152e-02,  8.6060e-01, -1.8571e-02,  2.1326e-01,
        -3.1070e-02,  1.5625e-01, -2.2231e-02,  1.7420e-01, -7.6728e-02,
         8.0831e-02,  9.4834e-02,  6.3272e-02,  5.3537e-01,  8.9564e-02,
        -1.7184e-02,  4.3060e-02,  1.5625e-01,  6.8436e-02, -1.5793e-02,
         4.2367e-01,  1.4391e-02,  9.5352e-02,  6.0246e-01,  6.8436e-02,
         1.7420e-01,  2.8717e-01,  2.2010e-02,  1.1194e+00,  3.1240e-02,
         4.6943e-02,  1.0792e-01,  8.6283e-02,  8.6060e-01,  3.2138e-01,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7152e-02,  8.6060e-01, -1.8571e-02,  2.1326e-01,
        -3.1070e-02,  1.5625e-01, -2.2231e-02,  1.7420e-01, -7.6728e-02,
         8.0831e-02,  9.4834e-02,  6.3272e-02,  5.3537e-01,  8.9564e-02,
        -1.7184e-02,  4.3060e-02,  1.5625e-01,  6.8436e-02, -1.5793e-02,
         4.2367e-01,  1.4391e-02,  9.5352e-02,  6.0246e-01,  6.8436e-02,
         1.7420e-01,  2.8717e-01,  2.2010e-02,  1.1194e+00,  3.1240e-02,
         4.6943e-02,  1.0792e-01,  8.6283e-02,  8.6060e-01,  3.2138e-01,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7152e-02,  8.6060e-01, -1.8571e-02,  2.1326e-01,
        -3.1070e-02,  1.5625e-01, -2.2231e-02,  1.7420e-01, -7.6728e-02,
         8.0831e-02,  9.4834e-02,  6.3272e-02,  5.3537e-01,  8.9564e-02,
        -1.7184e-02,  4.3060e-02,  1.5625e-01,  6.8436e-02, -1.5793e-02,
         4.2367e-01,  1.4391e-02,  9.5352e-02,  6.0246e-01,  6.8436e-02,
         1.7420e-01,  2.8717e-01,  2.2010e-02,  1.1194e+00,  3.1240e-02,
         4.6943e-02,  1.0792e-01,  8.6283e-02,  8.6060e-01,  3.2138e-01,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7152e-02,  8.6060e-01, -1.8571e-02,  2.1326e-01,
        -3.1070e-02,  1.5625e-01, -2.2231e-02,  1.7420e-01, -7.6728e-02,
         8.0831e-02,  9.4834e-02,  6.3272e-02,  5.3537e-01,  8.9564e-02,
        -1.7184e-02,  4.3060e-02,  1.5625e-01,  6.8436e-02, -1.5793e-02,
         4.2367e-01,  1.4391e-02,  9.5352e-02,  6.0246e-01,  6.8436e-02,
         1.7420e-01,  2.8717e-01,  2.2010e-02,  1.1194e+00,  3.1240e-02,
         4.6943e-02,  1.0792e-01,  8.6283e-02,  8.6060e-01,  3.2138e-01,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7152e-02,  8.6060e-01, -1.8571e-02,  2.1326e-01,
        -3.1070e-02,  1.5625e-01, -2.2231e-02,  1.7420e-01, -7.6728e-02,
         8.0831e-02,  9.4834e-02,  6.3272e-02,  5.3537e-01,  8.9564e-02,
        -1.7184e-02,  4.3060e-02,  1.5625e-01,  6.8436e-02, -1.5793e-02,
         4.2367e-01,  1.4391e-02,  9.5352e-02,  6.0246e-01,  6.8436e-02,
         1.7420e-01,  2.8717e-01,  2.2010e-02,  1.1194e+00,  3.1240e-02,
         4.6943e-02,  1.0792e-01,  8.6283e-02,  8.6060e-01,  3.2138e-01,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7152e-02,  8.6060e-01, -1.8571e-02,  2.1326e-01,
        -3.1070e-02,  1.5625e-01, -2.2231e-02,  1.7420e-01, -7.6728e-02,
         8.0831e-02,  9.4834e-02,  6.3272e-02,  5.3537e-01,  8.9564e-02,
        -1.7184e-02,  4.3060e-02,  1.5625e-01,  6.8436e-02, -1.5793e-02,
         4.2367e-01,  1.4391e-02,  9.5352e-02,  6.0246e-01,  6.8436e-02,
         1.7420e-01,  2.8717e-01,  2.2010e-02,  1.1194e+00,  3.1240e-02,
         4.6943e-02,  1.0792e-01,  8.6283e-02,  8.6060e-01,  3.2138e-01,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7152e-02,  8.6060e-01, -1.8571e-02,  2.1326e-01,
        -3.1070e-02,  1.5625e-01, -2.2231e-02,  1.7420e-01, -7.6728e-02,
         8.0831e-02,  9.4834e-02,  6.3272e-02,  5.3537e-01,  8.9564e-02,
        -1.7184e-02,  4.3060e-02,  1.5625e-01,  6.8436e-02, -1.5793e-02,
         4.2367e-01,  1.4391e-02,  9.5352e-02,  6.0246e-01,  6.8436e-02,
         1.7420e-01,  2.8717e-01,  2.2010e-02,  1.1194e+00,  3.1240e-02,
         4.6943e-02,  1.0792e-01,  8.6283e-02,  8.6060e-01,  3.2138e-01,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -7.3193e-02,  1.4351e-01,  1.1194e+00,  2.9773e-01,
        -4.8523e-02,  3.1000e-01,  1.8335e-01,  4.6837e-01,  2.9773e-01,
         1.7608e-01, -7.6778e-03,  1.9124e-01,  2.4391e-01,  2.9773e-01,
         4.2890e-02,  4.4977e-01,  2.1475e-01,  6.5667e-02, -1.6719e-03,
         6.3320e-01,  1.1194e+00,  3.2815e-02,  3.3428e-01,  2.0548e-01,
         2.0108e-01,  5.0742e-02,  4.1328e-01,  8.2697e-02, -2.0249e-02,
         8.3316e-02,  1.0153e-01,  9.4834e-02,  1.4764e+00,  2.5723e-01,
         9.7122e-01,  6.1239e-02, -2.7676e-02,  8.6060e-01, -1.9811e-02,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -7.3193e-02,  1.4351e-01,  1.1194e+00,  2.9773e-01,
        -4.8523e-02,  3.1000e-01,  1.8335e-01,  4.6837e-01,  2.9773e-01,
         1.7608e-01, -7.6778e-03,  1.9124e-01,  2.4391e-01,  2.9773e-01,
         4.2890e-02,  4.4977e-01,  2.1475e-01,  6.5667e-02, -1.6719e-03,
         6.3320e-01,  1.1194e+00,  3.2815e-02,  3.3428e-01,  2.0548e-01,
         2.0108e-01,  5.0742e-02,  4.1328e-01,  8.2697e-02, -2.0249e-02,
         8.3316e-02,  1.0153e-01,  9.4834e-02,  1.4764e+00,  2.5723e-01,
         9.7122e-01,  6.1239e-02, -2.7676e-02,  8.6060e-01, -1.9811e-02,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -7.3193e-02,  1.4351e-01,  1.1194e+00,  2.9773e-01,
        -4.8523e-02,  3.1000e-01,  1.8335e-01,  4.6837e-01,  2.9773e-01,
         1.7608e-01, -7.6778e-03,  1.9124e-01,  2.4391e-01,  2.9773e-01,
         4.2890e-02,  4.4977e-01,  2.1475e-01,  6.5667e-02, -1.6719e-03,
         6.3320e-01,  1.1194e+00,  3.2815e-02,  3.3428e-01,  2.0548e-01,
         2.0108e-01,  5.0742e-02,  4.1328e-01,  8.2697e-02, -2.0249e-02,
         8.3316e-02,  1.0153e-01,  9.4834e-02,  1.4764e+00,  2.5723e-01,
         9.7122e-01,  6.1239e-02, -2.7676e-02,  8.6060e-01, -1.9811e-02,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -7.3193e-02,  1.4351e-01,  1.1194e+00,  2.9773e-01,
        -4.8523e-02,  3.1000e-01,  1.8335e-01,  4.6837e-01,  2.9773e-01,
         1.7608e-01, -7.6778e-03,  1.9124e-01,  2.4391e-01,  2.9773e-01,
         4.2890e-02,  4.4977e-01,  2.1475e-01,  6.5667e-02, -1.6719e-03,
         6.3320e-01,  1.1194e+00,  3.2815e-02,  3.3428e-01,  2.0548e-01,
         2.0108e-01,  5.0742e-02,  4.1328e-01,  8.2697e-02, -2.0249e-02,
         8.3316e-02,  1.0153e-01,  9.4834e-02,  1.4764e+00,  2.5723e-01,
         9.7122e-01,  6.1239e-02, -2.7676e-02,  8.6060e-01, -1.9811e-02,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -7.3193e-02,  1.4351e-01,  1.1194e+00,  2.9773e-01,
        -4.8523e-02,  3.1000e-01,  1.8335e-01,  4.6837e-01,  2.9773e-01,
         1.7608e-01, -7.6778e-03,  1.9124e-01,  2.4391e-01,  2.9773e-01,
         4.2890e-02,  4.4977e-01,  2.1475e-01,  6.5667e-02, -1.6719e-03,
         6.3320e-01,  1.1194e+00,  3.2815e-02,  3.3428e-01,  2.0548e-01,
         2.0108e-01,  5.0742e-02,  4.1328e-01,  8.2697e-02, -2.0249e-02,
         8.3316e-02,  1.0153e-01,  9.4834e-02,  1.4764e+00,  2.5723e-01,
         9.7122e-01,  6.1239e-02, -2.7676e-02,  8.6060e-01, -1.9811e-02,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -7.3193e-02,  1.4351e-01,  1.1194e+00,  2.9773e-01,
        -4.8523e-02,  3.1000e-01,  1.8335e-01,  4.6837e-01,  2.9773e-01,
         1.7608e-01, -7.6778e-03,  1.9124e-01,  2.4391e-01,  2.9773e-01,
         4.2890e-02,  4.4977e-01,  2.1475e-01,  6.5667e-02, -1.6719e-03,
         6.3320e-01,  1.1194e+00,  3.2815e-02,  3.3428e-01,  2.0548e-01,
         2.0108e-01,  5.0742e-02,  4.1328e-01,  8.2697e-02, -2.0249e-02,
         8.3316e-02,  1.0153e-01,  9.4834e-02,  1.4764e+00,  2.5723e-01,
         9.7122e-01,  6.1239e-02, -2.7676e-02,  8.6060e-01, -1.9811e-02,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -7.3193e-02,  1.4351e-01,  1.1194e+00,  2.9773e-01,
        -4.8523e-02,  3.1000e-01,  1.8335e-01,  4.6837e-01,  2.9773e-01,
         1.7608e-01, -7.6778e-03,  1.9124e-01,  2.4391e-01,  2.9773e-01,
         4.2890e-02,  4.4977e-01,  2.1475e-01,  6.5667e-02, -1.6719e-03,
         6.3320e-01,  1.1194e+00,  3.2815e-02,  3.3428e-01,  2.0548e-01,
         2.0108e-01,  5.0742e-02,  4.1328e-01,  8.2697e-02, -2.0249e-02,
         8.3316e-02,  1.0153e-01,  9.4834e-02,  1.4764e+00,  2.5723e-01,
         9.7122e-01,  6.1239e-02, -2.7676e-02,  8.6060e-01, -1.9811e-02,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -7.3193e-02,  1.4351e-01,  1.1194e+00,  2.9773e-01,
        -4.8523e-02,  3.1000e-01,  1.8335e-01,  4.6837e-01,  2.9773e-01,
         1.7608e-01, -7.6778e-03,  1.9124e-01,  2.4391e-01,  2.9773e-01,
         4.2890e-02,  4.4977e-01,  2.1475e-01,  6.5667e-02, -1.6719e-03,
         6.3320e-01,  1.1194e+00,  3.2815e-02,  3.3428e-01,  2.0548e-01,
         2.0108e-01,  5.0742e-02,  4.1328e-01,  8.2697e-02, -2.0249e-02,
         8.3316e-02,  1.0153e-01,  9.4834e-02,  1.4764e+00,  2.5723e-01,
         9.7122e-01,  6.1239e-02, -2.7676e-02,  8.6060e-01, -1.9811e-02,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -7.3193e-02,  1.4351e-01,  1.1194e+00,  2.9773e-01,
        -4.8523e-02,  3.1000e-01,  1.8335e-01,  4.6837e-01,  2.9773e-01,
         1.7608e-01, -7.6778e-03,  1.9124e-01,  2.4391e-01,  2.9773e-01,
         4.2890e-02,  4.4977e-01,  2.1475e-01,  6.5667e-02, -1.6719e-03,
         6.3320e-01,  1.1194e+00,  3.2815e-02,  3.3428e-01,  2.0548e-01,
         2.0108e-01,  5.0742e-02,  4.1328e-01,  8.2697e-02, -2.0249e-02,
         8.3316e-02,  1.0153e-01,  9.4834e-02,  1.4764e+00,  2.5723e-01,
         9.7122e-01,  6.1239e-02, -2.7676e-02,  8.6060e-01, -1.9811e-02,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -7.3193e-02,  1.4351e-01,  1.1194e+00,  2.9773e-01,
        -4.8523e-02,  3.1000e-01,  1.8335e-01,  4.6837e-01,  2.9773e-01,
         1.7608e-01, -7.6778e-03,  1.9124e-01,  2.4391e-01,  2.9773e-01,
         4.2890e-02,  4.4977e-01,  2.1475e-01,  6.5667e-02, -1.6719e-03,
         6.3320e-01,  1.1194e+00,  3.2815e-02,  3.3428e-01,  2.0548e-01,
         2.0108e-01,  5.0742e-02,  4.1328e-01,  8.2697e-02, -2.0249e-02,
         8.3316e-02,  1.0153e-01,  9.4834e-02,  1.4764e+00,  2.5723e-01,
         9.7122e-01,  6.1239e-02, -2.7676e-02,  8.6060e-01, -1.9811e-02,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -7.3193e-02,  1.4351e-01,  1.1194e+00,  2.9773e-01,
        -4.8523e-02,  3.1000e-01,  1.8335e-01,  4.6837e-01,  2.9773e-01,
         1.7608e-01, -7.6778e-03,  1.9124e-01,  2.4391e-01,  2.9773e-01,
         4.2890e-02,  4.4977e-01,  2.1475e-01,  6.5667e-02, -1.6719e-03,
         6.3320e-01,  1.1194e+00,  3.2815e-02,  3.3428e-01,  2.0548e-01,
         2.0108e-01,  5.0742e-02,  4.1328e-01,  8.2697e-02, -2.0249e-02,
         8.3316e-02,  1.0153e-01,  9.4834e-02,  1.4764e+00,  2.5723e-01,
         9.7122e-01,  6.1239e-02, -2.7676e-02,  8.6060e-01, -1.9811e-02,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -7.3193e-02,  1.4351e-01,  1.1194e+00,  2.9773e-01,
        -4.8523e-02,  3.1000e-01,  1.8335e-01,  4.6837e-01,  2.9773e-01,
         1.7608e-01, -7.6778e-03,  1.9124e-01,  2.4391e-01,  2.9773e-01,
         4.2890e-02,  4.4977e-01,  2.1475e-01,  6.5667e-02, -1.6719e-03,
         6.3320e-01,  1.1194e+00,  3.2815e-02,  3.3428e-01,  2.0548e-01,
         2.0108e-01,  5.0742e-02,  4.1328e-01,  8.2697e-02, -2.0249e-02,
         8.3316e-02,  1.0153e-01,  9.4834e-02,  1.4764e+00,  2.5723e-01,
         9.7122e-01,  6.1239e-02, -2.7676e-02,  8.6060e-01, -1.9811e-02,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -6.8531e-03,  8.5674e-02,  1.7762e-01,  5.6638e-01,
         5.2340e-01,  1.4715e-01,  9.3092e-02,  5.1679e-01,  1.2012e-01,
         2.5795e-01, -3.1702e-02,  2.4864e-02, -2.3533e-02,  8.6283e-02,
         3.6311e-02,  2.5204e-02,  4.7042e-01,  5.1363e-02,  9.9946e-02,
         3.1037e-01,  8.9674e-02,  5.0742e-02,  1.1194e+00,  3.1000e-01,
        -1.0877e-02,  3.6829e-02,  2.6615e-01,  1.7075e-01, -1.0000e+09,
         6.0246e-01,  2.6615e-01,  1.0403e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -6.8531e-03,  8.5674e-02,  1.7762e-01,  5.6638e-01,
         5.2340e-01,  1.4715e-01,  9.3092e-02,  5.1679e-01,  1.2012e-01,
         2.5795e-01, -3.1702e-02,  2.4864e-02, -2.3533e-02,  8.6283e-02,
         3.6311e-02,  2.5204e-02,  4.7042e-01,  5.1363e-02,  9.9946e-02,
         3.1037e-01,  8.9674e-02,  5.0742e-02,  1.1194e+00,  3.1000e-01,
        -1.0877e-02,  3.6829e-02,  2.6615e-01,  1.7075e-01, -1.0000e+09,
         6.0246e-01,  2.6615e-01,  1.0403e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -6.8531e-03,  8.5674e-02,  1.7762e-01,  5.6638e-01,
         5.2340e-01,  1.4715e-01,  9.3092e-02,  5.1679e-01,  1.2012e-01,
         2.5795e-01, -3.1702e-02,  2.4864e-02, -2.3533e-02,  8.6283e-02,
         3.6311e-02,  2.5204e-02,  4.7042e-01,  5.1363e-02,  9.9946e-02,
         3.1037e-01,  8.9674e-02,  5.0742e-02,  1.1194e+00,  3.1000e-01,
        -1.0877e-02,  3.6829e-02,  2.6615e-01,  1.7075e-01, -1.0000e+09,
         6.0246e-01,  2.6615e-01,  1.0403e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -6.8531e-03,  8.5674e-02,  1.7762e-01,  5.6638e-01,
         5.2340e-01,  1.4715e-01,  9.3092e-02,  5.1679e-01,  1.2012e-01,
         2.5795e-01, -3.1702e-02,  2.4864e-02, -2.3533e-02,  8.6283e-02,
         3.6311e-02,  2.5204e-02,  4.7042e-01,  5.1363e-02,  9.9946e-02,
         3.1037e-01,  8.9674e-02,  5.0742e-02,  1.1194e+00,  3.1000e-01,
        -1.0877e-02,  3.6829e-02,  2.6615e-01,  1.7075e-01, -1.0000e+09,
         6.0246e-01,  2.6615e-01,  1.0403e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -6.8531e-03,  8.5674e-02,  1.7762e-01,  5.6638e-01,
         5.2340e-01,  1.4715e-01,  9.3092e-02,  5.1679e-01,  1.2012e-01,
         2.5795e-01, -3.1702e-02,  2.4864e-02, -2.3533e-02,  8.6283e-02,
         3.6311e-02,  2.5204e-02,  4.7042e-01,  5.1363e-02,  9.9946e-02,
         3.1037e-01,  8.9674e-02,  5.0742e-02,  1.1194e+00,  3.1000e-01,
        -1.0877e-02,  3.6829e-02,  2.6615e-01,  1.7075e-01, -1.0000e+09,
         6.0246e-01,  2.6615e-01,  1.0403e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -6.8531e-03,  8.5674e-02,  1.7762e-01,  5.6638e-01,
         5.2340e-01,  1.4715e-01,  9.3092e-02,  5.1679e-01,  1.2012e-01,
         2.5795e-01, -3.1702e-02,  2.4864e-02, -2.3533e-02,  8.6283e-02,
         3.6311e-02,  2.5204e-02,  4.7042e-01,  5.1363e-02,  9.9946e-02,
         3.1037e-01,  8.9674e-02,  5.0742e-02,  1.1194e+00,  3.1000e-01,
        -1.0877e-02,  3.6829e-02,  2.6615e-01,  1.7075e-01, -1.0000e+09,
         6.0246e-01,  2.6615e-01,  1.0403e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -6.8531e-03,  8.5674e-02,  1.7762e-01,  5.6638e-01,
         5.2340e-01,  1.4715e-01,  9.3092e-02,  5.1679e-01,  1.2012e-01,
         2.5795e-01, -3.1702e-02,  2.4864e-02, -2.3533e-02,  8.6283e-02,
         3.6311e-02,  2.5204e-02,  4.7042e-01,  5.1363e-02,  9.9946e-02,
         3.1037e-01,  8.9674e-02,  5.0742e-02,  1.1194e+00,  3.1000e-01,
        -1.0877e-02,  3.6829e-02,  2.6615e-01,  1.7075e-01, -1.0000e+09,
         6.0246e-01,  2.6615e-01,  1.0403e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -6.8531e-03,  8.5674e-02,  1.7762e-01,  5.6638e-01,
         5.2340e-01,  1.4715e-01,  9.3092e-02,  5.1679e-01,  1.2012e-01,
         2.5795e-01, -3.1702e-02,  2.4864e-02, -2.3533e-02,  8.6283e-02,
         3.6311e-02,  2.5204e-02,  4.7042e-01,  5.1363e-02,  9.9946e-02,
         3.1037e-01,  8.9674e-02,  5.0742e-02,  1.1194e+00,  3.1000e-01,
        -1.0877e-02,  3.6829e-02,  2.6615e-01,  1.7075e-01, -1.0000e+09,
         6.0246e-01,  2.6615e-01,  1.0403e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -6.8531e-03,  8.5674e-02,  1.7762e-01,  5.6638e-01,
         5.2340e-01,  1.4715e-01,  9.3092e-02,  5.1679e-01,  1.2012e-01,
         2.5795e-01, -3.1702e-02,  2.4864e-02, -2.3533e-02,  8.6283e-02,
         3.6311e-02,  2.5204e-02,  4.7042e-01,  5.1363e-02,  9.9946e-02,
         3.1037e-01,  8.9674e-02,  5.0742e-02,  1.1194e+00,  3.1000e-01,
        -1.0877e-02,  3.6829e-02,  2.6615e-01,  1.7075e-01, -1.0000e+09,
         6.0246e-01,  2.6615e-01,  1.0403e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -6.8531e-03,  8.5674e-02,  1.7762e-01,  5.6638e-01,
         5.2340e-01,  1.4715e-01,  9.3092e-02,  5.1679e-01,  1.2012e-01,
         2.5795e-01, -3.1702e-02,  2.4864e-02, -2.3533e-02,  8.6283e-02,
         3.6311e-02,  2.5204e-02,  4.7042e-01,  5.1363e-02,  9.9946e-02,
         3.1037e-01,  8.9674e-02,  5.0742e-02,  1.1194e+00,  3.1000e-01,
        -1.0877e-02,  3.6829e-02,  2.6615e-01,  1.7075e-01, -1.0000e+09,
         6.0246e-01,  2.6615e-01,  1.0403e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -6.8531e-03,  8.5674e-02,  1.7762e-01,  5.6638e-01,
         5.2340e-01,  1.4715e-01,  9.3092e-02,  5.1679e-01,  1.2012e-01,
         2.5795e-01, -3.1702e-02,  2.4864e-02, -2.3533e-02,  8.6283e-02,
         3.6311e-02,  2.5204e-02,  4.7042e-01,  5.1363e-02,  9.9946e-02,
         3.1037e-01,  8.9674e-02,  5.0742e-02,  1.1194e+00,  3.1000e-01,
        -1.0877e-02,  3.6829e-02,  2.6615e-01,  1.7075e-01, -1.0000e+09,
         6.0246e-01,  2.6615e-01,  1.0403e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -6.8531e-03,  8.5674e-02,  1.7762e-01,  5.6638e-01,
         5.2340e-01,  1.4715e-01,  9.3092e-02,  5.1679e-01,  1.2012e-01,
         2.5795e-01, -3.1702e-02,  2.4864e-02, -2.3533e-02,  8.6283e-02,
         3.6311e-02,  2.5204e-02,  4.7042e-01,  5.1363e-02,  9.9946e-02,
         3.1037e-01,  8.9674e-02,  5.0742e-02,  1.1194e+00,  3.1000e-01,
        -1.0877e-02,  3.6829e-02,  2.6615e-01,  1.7075e-01, -1.0000e+09,
         6.0246e-01,  2.6615e-01,  1.0403e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

In [27]:
# Select some sentences randomly
sent_index = [11,12,13,14,15,16,17,18,19,20]

for s in sent_index:
  print("*" * 100)
  # Get the sentence's words
  tokens = test_inputs[s]
  # For each layer...
  for l in range(12):
    print("\nLayer", l+1)
    attention = np.squeeze(test_attentions[s][l].detach().cpu().numpy(), axis=0)
    # and for each head
    #print("Head", h+1)
    # Get the attention for the cls token
    encoded_tokens = bert_tokenizer(tokens, truncation=True, padding=True, max_length=128, return_tensors='pt')
    encoded_tokens = encoded_tokens.to(device)
    with torch.no_grad():
      model_output1 = model_e(**encoded_tokens)
      tokens_embeddings = max_pooling(model_output1, encoded_tokens['attention_mask'])
      tokens_embeddings = tokens_embeddings.cpu()
      cls_attentions = head[0]
      display(HTML(colorize(tokens, cls_attentions)))
      print("Tokens embeddings:")
      print(tokens_embeddings)
      arg = argmax(tokens_embeddings)
      print('arg max of %s: %d' % (tokens_embeddings, arg))

****************************************************************************************************

Layer 1


Tokens embeddings:
tensor([ 3.3173e-02,  4.1790e-01,  7.7364e-02, -3.0985e-02,  5.0742e-02,
         8.6226e-01,  6.1239e-02,  2.6615e-01,  7.5485e-02,  2.3997e-01,
         6.4561e-01,  1.1841e-01,  1.5532e-01,  5.8823e-02,  8.9582e-02,
         4.1413e-03,  1.0710e-01,  1.0359e-01,  6.0246e-01,  9.0632e-03,
         7.8163e-02, -1.8067e-02,  1.8976e-01,  9.4834e-02,  2.4700e-01,
         1.5038e-01,  2.8950e-02,  1.6537e-02,  1.1194e+00, -2.7676e-02,
         5.8310e-02, -6.7728e-02,  8.6060e-01,  6.0246e-01,  1.5389e-01,
        -4.1591e-02, -7.5124e-03, -2.8679e-02,  8.6793e-02,  8.6060e-01,
         5.5468e-02,  1.2260e-01,  6.3631e-02, -2.4761e-03,  2.6615e-01,
         1.4651e-01,  2.9773e-01,  1.2796e-02,  4.4977e-01, -4.8438e-02,
         8.9918e-02,  2.2343e-01,  2.6615e-01,  3.3842e-03,  5.0742e-02,
        -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  4.1790e-01,  7.7364e-02, -3.0985e-02,  5.0742e-02,
         8.6226e-01,  6.1239e-02,  2.6615e-01,  7.5485e-02,  2.3997e-01,
         6.4561e-01,  1.1841e-01,  1.5532e-01,  5.8823e-02,  8.9582e-02,
         4.1413e-03,  1.0710e-01,  1.0359e-01,  6.0246e-01,  9.0632e-03,
         7.8163e-02, -1.8067e-02,  1.8976e-01,  9.4834e-02,  2.4700e-01,
         1.5038e-01,  2.8950e-02,  1.6537e-02,  1.1194e+00, -2.7676e-02,
         5.8310e-02, -6.7728e-02,  8.6060e-01,  6.0246e-01,  1.5389e-01,
        -4.1591e-02, -7.5124e-03, -2.8679e-02,  8.6793e-02,  8.6060e-01,
         5.5468e-02,  1.2260e-01,  6.3631e-02, -2.4761e-03,  2.6615e-01,
         1.4651e-01,  2.9773e-01,  1.2796e-02,  4.4977e-01, -4.8438e-02,
         8.9918e-02,  2.2343e-01,  2.6615e-01,  3.3842e-03,  5.0742e-02,
        -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  4.1790e-01,  7.7364e-02, -3.0985e-02,  5.0742e-02,
         8.6226e-01,  6.1239e-02,  2.6615e-01,  7.5485e-02,  2.3997e-01,
         6.4561e-01,  1.1841e-01,  1.5532e-01,  5.8823e-02,  8.9582e-02,
         4.1413e-03,  1.0710e-01,  1.0359e-01,  6.0246e-01,  9.0632e-03,
         7.8163e-02, -1.8067e-02,  1.8976e-01,  9.4834e-02,  2.4700e-01,
         1.5038e-01,  2.8950e-02,  1.6537e-02,  1.1194e+00, -2.7676e-02,
         5.8310e-02, -6.7728e-02,  8.6060e-01,  6.0246e-01,  1.5389e-01,
        -4.1591e-02, -7.5124e-03, -2.8679e-02,  8.6793e-02,  8.6060e-01,
         5.5468e-02,  1.2260e-01,  6.3631e-02, -2.4761e-03,  2.6615e-01,
         1.4651e-01,  2.9773e-01,  1.2796e-02,  4.4977e-01, -4.8438e-02,
         8.9918e-02,  2.2343e-01,  2.6615e-01,  3.3842e-03,  5.0742e-02,
        -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  4.1790e-01,  7.7364e-02, -3.0985e-02,  5.0742e-02,
         8.6226e-01,  6.1239e-02,  2.6615e-01,  7.5485e-02,  2.3997e-01,
         6.4561e-01,  1.1841e-01,  1.5532e-01,  5.8823e-02,  8.9582e-02,
         4.1413e-03,  1.0710e-01,  1.0359e-01,  6.0246e-01,  9.0632e-03,
         7.8163e-02, -1.8067e-02,  1.8976e-01,  9.4834e-02,  2.4700e-01,
         1.5038e-01,  2.8950e-02,  1.6537e-02,  1.1194e+00, -2.7676e-02,
         5.8310e-02, -6.7728e-02,  8.6060e-01,  6.0246e-01,  1.5389e-01,
        -4.1591e-02, -7.5124e-03, -2.8679e-02,  8.6793e-02,  8.6060e-01,
         5.5468e-02,  1.2260e-01,  6.3631e-02, -2.4761e-03,  2.6615e-01,
         1.4651e-01,  2.9773e-01,  1.2796e-02,  4.4977e-01, -4.8438e-02,
         8.9918e-02,  2.2343e-01,  2.6615e-01,  3.3842e-03,  5.0742e-02,
        -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  4.1790e-01,  7.7364e-02, -3.0985e-02,  5.0742e-02,
         8.6226e-01,  6.1239e-02,  2.6615e-01,  7.5485e-02,  2.3997e-01,
         6.4561e-01,  1.1841e-01,  1.5532e-01,  5.8823e-02,  8.9582e-02,
         4.1413e-03,  1.0710e-01,  1.0359e-01,  6.0246e-01,  9.0632e-03,
         7.8163e-02, -1.8067e-02,  1.8976e-01,  9.4834e-02,  2.4700e-01,
         1.5038e-01,  2.8950e-02,  1.6537e-02,  1.1194e+00, -2.7676e-02,
         5.8310e-02, -6.7728e-02,  8.6060e-01,  6.0246e-01,  1.5389e-01,
        -4.1591e-02, -7.5124e-03, -2.8679e-02,  8.6793e-02,  8.6060e-01,
         5.5468e-02,  1.2260e-01,  6.3631e-02, -2.4761e-03,  2.6615e-01,
         1.4651e-01,  2.9773e-01,  1.2796e-02,  4.4977e-01, -4.8438e-02,
         8.9918e-02,  2.2343e-01,  2.6615e-01,  3.3842e-03,  5.0742e-02,
        -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  4.1790e-01,  7.7364e-02, -3.0985e-02,  5.0742e-02,
         8.6226e-01,  6.1239e-02,  2.6615e-01,  7.5485e-02,  2.3997e-01,
         6.4561e-01,  1.1841e-01,  1.5532e-01,  5.8823e-02,  8.9582e-02,
         4.1413e-03,  1.0710e-01,  1.0359e-01,  6.0246e-01,  9.0632e-03,
         7.8163e-02, -1.8067e-02,  1.8976e-01,  9.4834e-02,  2.4700e-01,
         1.5038e-01,  2.8950e-02,  1.6537e-02,  1.1194e+00, -2.7676e-02,
         5.8310e-02, -6.7728e-02,  8.6060e-01,  6.0246e-01,  1.5389e-01,
        -4.1591e-02, -7.5124e-03, -2.8679e-02,  8.6793e-02,  8.6060e-01,
         5.5468e-02,  1.2260e-01,  6.3631e-02, -2.4761e-03,  2.6615e-01,
         1.4651e-01,  2.9773e-01,  1.2796e-02,  4.4977e-01, -4.8438e-02,
         8.9918e-02,  2.2343e-01,  2.6615e-01,  3.3842e-03,  5.0742e-02,
        -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  4.1790e-01,  7.7364e-02, -3.0985e-02,  5.0742e-02,
         8.6226e-01,  6.1239e-02,  2.6615e-01,  7.5485e-02,  2.3997e-01,
         6.4561e-01,  1.1841e-01,  1.5532e-01,  5.8823e-02,  8.9582e-02,
         4.1413e-03,  1.0710e-01,  1.0359e-01,  6.0246e-01,  9.0632e-03,
         7.8163e-02, -1.8067e-02,  1.8976e-01,  9.4834e-02,  2.4700e-01,
         1.5038e-01,  2.8950e-02,  1.6537e-02,  1.1194e+00, -2.7676e-02,
         5.8310e-02, -6.7728e-02,  8.6060e-01,  6.0246e-01,  1.5389e-01,
        -4.1591e-02, -7.5124e-03, -2.8679e-02,  8.6793e-02,  8.6060e-01,
         5.5468e-02,  1.2260e-01,  6.3631e-02, -2.4761e-03,  2.6615e-01,
         1.4651e-01,  2.9773e-01,  1.2796e-02,  4.4977e-01, -4.8438e-02,
         8.9918e-02,  2.2343e-01,  2.6615e-01,  3.3842e-03,  5.0742e-02,
        -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  4.1790e-01,  7.7364e-02, -3.0985e-02,  5.0742e-02,
         8.6226e-01,  6.1239e-02,  2.6615e-01,  7.5485e-02,  2.3997e-01,
         6.4561e-01,  1.1841e-01,  1.5532e-01,  5.8823e-02,  8.9582e-02,
         4.1413e-03,  1.0710e-01,  1.0359e-01,  6.0246e-01,  9.0632e-03,
         7.8163e-02, -1.8067e-02,  1.8976e-01,  9.4834e-02,  2.4700e-01,
         1.5038e-01,  2.8950e-02,  1.6537e-02,  1.1194e+00, -2.7676e-02,
         5.8310e-02, -6.7728e-02,  8.6060e-01,  6.0246e-01,  1.5389e-01,
        -4.1591e-02, -7.5124e-03, -2.8679e-02,  8.6793e-02,  8.6060e-01,
         5.5468e-02,  1.2260e-01,  6.3631e-02, -2.4761e-03,  2.6615e-01,
         1.4651e-01,  2.9773e-01,  1.2796e-02,  4.4977e-01, -4.8438e-02,
         8.9918e-02,  2.2343e-01,  2.6615e-01,  3.3842e-03,  5.0742e-02,
        -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  4.1790e-01,  7.7364e-02, -3.0985e-02,  5.0742e-02,
         8.6226e-01,  6.1239e-02,  2.6615e-01,  7.5485e-02,  2.3997e-01,
         6.4561e-01,  1.1841e-01,  1.5532e-01,  5.8823e-02,  8.9582e-02,
         4.1413e-03,  1.0710e-01,  1.0359e-01,  6.0246e-01,  9.0632e-03,
         7.8163e-02, -1.8067e-02,  1.8976e-01,  9.4834e-02,  2.4700e-01,
         1.5038e-01,  2.8950e-02,  1.6537e-02,  1.1194e+00, -2.7676e-02,
         5.8310e-02, -6.7728e-02,  8.6060e-01,  6.0246e-01,  1.5389e-01,
        -4.1591e-02, -7.5124e-03, -2.8679e-02,  8.6793e-02,  8.6060e-01,
         5.5468e-02,  1.2260e-01,  6.3631e-02, -2.4761e-03,  2.6615e-01,
         1.4651e-01,  2.9773e-01,  1.2796e-02,  4.4977e-01, -4.8438e-02,
         8.9918e-02,  2.2343e-01,  2.6615e-01,  3.3842e-03,  5.0742e-02,
        -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  4.1790e-01,  7.7364e-02, -3.0985e-02,  5.0742e-02,
         8.6226e-01,  6.1239e-02,  2.6615e-01,  7.5485e-02,  2.3997e-01,
         6.4561e-01,  1.1841e-01,  1.5532e-01,  5.8823e-02,  8.9582e-02,
         4.1413e-03,  1.0710e-01,  1.0359e-01,  6.0246e-01,  9.0632e-03,
         7.8163e-02, -1.8067e-02,  1.8976e-01,  9.4834e-02,  2.4700e-01,
         1.5038e-01,  2.8950e-02,  1.6537e-02,  1.1194e+00, -2.7676e-02,
         5.8310e-02, -6.7728e-02,  8.6060e-01,  6.0246e-01,  1.5389e-01,
        -4.1591e-02, -7.5124e-03, -2.8679e-02,  8.6793e-02,  8.6060e-01,
         5.5468e-02,  1.2260e-01,  6.3631e-02, -2.4761e-03,  2.6615e-01,
         1.4651e-01,  2.9773e-01,  1.2796e-02,  4.4977e-01, -4.8438e-02,
         8.9918e-02,  2.2343e-01,  2.6615e-01,  3.3842e-03,  5.0742e-02,
        -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  4.1790e-01,  7.7364e-02, -3.0985e-02,  5.0742e-02,
         8.6226e-01,  6.1239e-02,  2.6615e-01,  7.5485e-02,  2.3997e-01,
         6.4561e-01,  1.1841e-01,  1.5532e-01,  5.8823e-02,  8.9582e-02,
         4.1413e-03,  1.0710e-01,  1.0359e-01,  6.0246e-01,  9.0632e-03,
         7.8163e-02, -1.8067e-02,  1.8976e-01,  9.4834e-02,  2.4700e-01,
         1.5038e-01,  2.8950e-02,  1.6537e-02,  1.1194e+00, -2.7676e-02,
         5.8310e-02, -6.7728e-02,  8.6060e-01,  6.0246e-01,  1.5389e-01,
        -4.1591e-02, -7.5124e-03, -2.8679e-02,  8.6793e-02,  8.6060e-01,
         5.5468e-02,  1.2260e-01,  6.3631e-02, -2.4761e-03,  2.6615e-01,
         1.4651e-01,  2.9773e-01,  1.2796e-02,  4.4977e-01, -4.8438e-02,
         8.9918e-02,  2.2343e-01,  2.6615e-01,  3.3842e-03,  5.0742e-02,
        -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  4.1790e-01,  7.7364e-02, -3.0985e-02,  5.0742e-02,
         8.6226e-01,  6.1239e-02,  2.6615e-01,  7.5485e-02,  2.3997e-01,
         6.4561e-01,  1.1841e-01,  1.5532e-01,  5.8823e-02,  8.9582e-02,
         4.1413e-03,  1.0710e-01,  1.0359e-01,  6.0246e-01,  9.0632e-03,
         7.8163e-02, -1.8067e-02,  1.8976e-01,  9.4834e-02,  2.4700e-01,
         1.5038e-01,  2.8950e-02,  1.6537e-02,  1.1194e+00, -2.7676e-02,
         5.8310e-02, -6.7728e-02,  8.6060e-01,  6.0246e-01,  1.5389e-01,
        -4.1591e-02, -7.5124e-03, -2.8679e-02,  8.6793e-02,  8.6060e-01,
         5.5468e-02,  1.2260e-01,  6.3631e-02, -2.4761e-03,  2.6615e-01,
         1.4651e-01,  2.9773e-01,  1.2796e-02,  4.4977e-01, -4.8438e-02,
         8.9918e-02,  2.2343e-01,  2.6615e-01,  3.3842e-03,  5.0742e-02,
        -2.7676e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  2.5993e-01,  4.4977e-01,  2.9773e-01,  7.1086e-02,
        -2.0813e-02,  1.0382e-01,  3.7179e-02,  5.0742e-02,  9.4834e-02,
         2.9773e-01, -9.2530e-02,  7.6094e-03,  8.4558e-02,  2.9773e-01,
         1.4050e-01,  8.6283e-02,  1.0056e-02,  4.3060e-02, -1.4160e-02,
         4.8553e-02, -2.1518e-03,  4.4021e-02,  5.0068e-01,  8.2257e-02,
        -2.1518e-03,  3.1495e-02,  2.6615e-01,  1.4651e-01,  2.4700e-01,
         4.4130e-02, -1.5069e-02,  9.4834e-02, -1.0000e+09,  4.3746e-02,
         6.0306e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  2.5993e-01,  4.4977e-01,  2.9773e-01,  7.1086e-02,
        -2.0813e-02,  1.0382e-01,  3.7179e-02,  5.0742e-02,  9.4834e-02,
         2.9773e-01, -9.2530e-02,  7.6094e-03,  8.4558e-02,  2.9773e-01,
         1.4050e-01,  8.6283e-02,  1.0056e-02,  4.3060e-02, -1.4160e-02,
         4.8553e-02, -2.1518e-03,  4.4021e-02,  5.0068e-01,  8.2257e-02,
        -2.1518e-03,  3.1495e-02,  2.6615e-01,  1.4651e-01,  2.4700e-01,
         4.4130e-02, -1.5069e-02,  9.4834e-02, -1.0000e+09,  4.3746e-02,
         6.0306e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  2.5993e-01,  4.4977e-01,  2.9773e-01,  7.1086e-02,
        -2.0813e-02,  1.0382e-01,  3.7179e-02,  5.0742e-02,  9.4834e-02,
         2.9773e-01, -9.2530e-02,  7.6094e-03,  8.4558e-02,  2.9773e-01,
         1.4050e-01,  8.6283e-02,  1.0056e-02,  4.3060e-02, -1.4160e-02,
         4.8553e-02, -2.1518e-03,  4.4021e-02,  5.0068e-01,  8.2257e-02,
        -2.1518e-03,  3.1495e-02,  2.6615e-01,  1.4651e-01,  2.4700e-01,
         4.4130e-02, -1.5069e-02,  9.4834e-02, -1.0000e+09,  4.3746e-02,
         6.0306e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  2.5993e-01,  4.4977e-01,  2.9773e-01,  7.1086e-02,
        -2.0813e-02,  1.0382e-01,  3.7179e-02,  5.0742e-02,  9.4834e-02,
         2.9773e-01, -9.2530e-02,  7.6094e-03,  8.4558e-02,  2.9773e-01,
         1.4050e-01,  8.6283e-02,  1.0056e-02,  4.3060e-02, -1.4160e-02,
         4.8553e-02, -2.1518e-03,  4.4021e-02,  5.0068e-01,  8.2257e-02,
        -2.1518e-03,  3.1495e-02,  2.6615e-01,  1.4651e-01,  2.4700e-01,
         4.4130e-02, -1.5069e-02,  9.4834e-02, -1.0000e+09,  4.3746e-02,
         6.0306e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  2.5993e-01,  4.4977e-01,  2.9773e-01,  7.1086e-02,
        -2.0813e-02,  1.0382e-01,  3.7179e-02,  5.0742e-02,  9.4834e-02,
         2.9773e-01, -9.2530e-02,  7.6094e-03,  8.4558e-02,  2.9773e-01,
         1.4050e-01,  8.6283e-02,  1.0056e-02,  4.3060e-02, -1.4160e-02,
         4.8553e-02, -2.1518e-03,  4.4021e-02,  5.0068e-01,  8.2257e-02,
        -2.1518e-03,  3.1495e-02,  2.6615e-01,  1.4651e-01,  2.4700e-01,
         4.4130e-02, -1.5069e-02,  9.4834e-02, -1.0000e+09,  4.3746e-02,
         6.0306e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  2.5993e-01,  4.4977e-01,  2.9773e-01,  7.1086e-02,
        -2.0813e-02,  1.0382e-01,  3.7179e-02,  5.0742e-02,  9.4834e-02,
         2.9773e-01, -9.2530e-02,  7.6094e-03,  8.4558e-02,  2.9773e-01,
         1.4050e-01,  8.6283e-02,  1.0056e-02,  4.3060e-02, -1.4160e-02,
         4.8553e-02, -2.1518e-03,  4.4021e-02,  5.0068e-01,  8.2257e-02,
        -2.1518e-03,  3.1495e-02,  2.6615e-01,  1.4651e-01,  2.4700e-01,
         4.4130e-02, -1.5069e-02,  9.4834e-02, -1.0000e+09,  4.3746e-02,
         6.0306e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  2.5993e-01,  4.4977e-01,  2.9773e-01,  7.1086e-02,
        -2.0813e-02,  1.0382e-01,  3.7179e-02,  5.0742e-02,  9.4834e-02,
         2.9773e-01, -9.2530e-02,  7.6094e-03,  8.4558e-02,  2.9773e-01,
         1.4050e-01,  8.6283e-02,  1.0056e-02,  4.3060e-02, -1.4160e-02,
         4.8553e-02, -2.1518e-03,  4.4021e-02,  5.0068e-01,  8.2257e-02,
        -2.1518e-03,  3.1495e-02,  2.6615e-01,  1.4651e-01,  2.4700e-01,
         4.4130e-02, -1.5069e-02,  9.4834e-02, -1.0000e+09,  4.3746e-02,
         6.0306e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  2.5993e-01,  4.4977e-01,  2.9773e-01,  7.1086e-02,
        -2.0813e-02,  1.0382e-01,  3.7179e-02,  5.0742e-02,  9.4834e-02,
         2.9773e-01, -9.2530e-02,  7.6094e-03,  8.4558e-02,  2.9773e-01,
         1.4050e-01,  8.6283e-02,  1.0056e-02,  4.3060e-02, -1.4160e-02,
         4.8553e-02, -2.1518e-03,  4.4021e-02,  5.0068e-01,  8.2257e-02,
        -2.1518e-03,  3.1495e-02,  2.6615e-01,  1.4651e-01,  2.4700e-01,
         4.4130e-02, -1.5069e-02,  9.4834e-02, -1.0000e+09,  4.3746e-02,
         6.0306e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  2.5993e-01,  4.4977e-01,  2.9773e-01,  7.1086e-02,
        -2.0813e-02,  1.0382e-01,  3.7179e-02,  5.0742e-02,  9.4834e-02,
         2.9773e-01, -9.2530e-02,  7.6094e-03,  8.4558e-02,  2.9773e-01,
         1.4050e-01,  8.6283e-02,  1.0056e-02,  4.3060e-02, -1.4160e-02,
         4.8553e-02, -2.1518e-03,  4.4021e-02,  5.0068e-01,  8.2257e-02,
        -2.1518e-03,  3.1495e-02,  2.6615e-01,  1.4651e-01,  2.4700e-01,
         4.4130e-02, -1.5069e-02,  9.4834e-02, -1.0000e+09,  4.3746e-02,
         6.0306e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  2.5993e-01,  4.4977e-01,  2.9773e-01,  7.1086e-02,
        -2.0813e-02,  1.0382e-01,  3.7179e-02,  5.0742e-02,  9.4834e-02,
         2.9773e-01, -9.2530e-02,  7.6094e-03,  8.4558e-02,  2.9773e-01,
         1.4050e-01,  8.6283e-02,  1.0056e-02,  4.3060e-02, -1.4160e-02,
         4.8553e-02, -2.1518e-03,  4.4021e-02,  5.0068e-01,  8.2257e-02,
        -2.1518e-03,  3.1495e-02,  2.6615e-01,  1.4651e-01,  2.4700e-01,
         4.4130e-02, -1.5069e-02,  9.4834e-02, -1.0000e+09,  4.3746e-02,
         6.0306e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  2.5993e-01,  4.4977e-01,  2.9773e-01,  7.1086e-02,
        -2.0813e-02,  1.0382e-01,  3.7179e-02,  5.0742e-02,  9.4834e-02,
         2.9773e-01, -9.2530e-02,  7.6094e-03,  8.4558e-02,  2.9773e-01,
         1.4050e-01,  8.6283e-02,  1.0056e-02,  4.3060e-02, -1.4160e-02,
         4.8553e-02, -2.1518e-03,  4.4021e-02,  5.0068e-01,  8.2257e-02,
        -2.1518e-03,  3.1495e-02,  2.6615e-01,  1.4651e-01,  2.4700e-01,
         4.4130e-02, -1.5069e-02,  9.4834e-02, -1.0000e+09,  4.3746e-02,
         6.0306e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  2.5993e-01,  4.4977e-01,  2.9773e-01,  7.1086e-02,
        -2.0813e-02,  1.0382e-01,  3.7179e-02,  5.0742e-02,  9.4834e-02,
         2.9773e-01, -9.2530e-02,  7.6094e-03,  8.4558e-02,  2.9773e-01,
         1.4050e-01,  8.6283e-02,  1.0056e-02,  4.3060e-02, -1.4160e-02,
         4.8553e-02, -2.1518e-03,  4.4021e-02,  5.0068e-01,  8.2257e-02,
        -2.1518e-03,  3.1495e-02,  2.6615e-01,  1.4651e-01,  2.4700e-01,
         4.4130e-02, -1.5069e-02,  9.4834e-02, -1.0000e+09,  4.3746e-02,
         6.0306e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7750e-02,  7.4659e-02,  8.6060e-01, -1.1947e-02,
        -2.5002e-02,  5.0742e-02,  8.6283e-02,  3.6311e-02,  2.4700e-01,
         2.4805e-02,  5.6638e-01,  8.2257e-02, -2.1518e-03,  3.9759e-02,
        -1.0000e+09,  2.9773e-01,  2.9939e-01,  8.8600e-02, -1.9156e-03,
         9.8456e-02,  1.7805e-01,  1.4613e-02, -2.7676e-02,  3.6311e-02,
        -3.1070e-02,  1.7152e-02,  8.6060e-01,  5.0448e-02,  8.2359e-03,
         8.3693e-02, -2.1527e-02,  2.3273e-01, -2.1518e-03,  7.1574e-02,
         4.4977e-01,  2.4700e-01, -6.2928e-02,  9.4834e-02,  2.4783e-01,
         4.1332e-02,  4.3060e-02,  1.5625e-01,  2.3763e-02,  2.6615e-01,
         4.0675e-02,  2.4700e-01, -1.0000e+09,  4.4977e-01,  1.8822e-02,
         5.0742e-02,  2.7198e-01,  2.4700e-01,  3.8493e-01,  2.4864e-02,
         9.4834e-02,  5.2006e-02, -2.7676e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7750e-02,  7.4659e-02,  8.6060e-01, -1.1947e-02,
        -2.5002e-02,  5.0742e-02,  8.6283e-02,  3.6311e-02,  2.4700e-01,
         2.4805e-02,  5.6638e-01,  8.2257e-02, -2.1518e-03,  3.9759e-02,
        -1.0000e+09,  2.9773e-01,  2.9939e-01,  8.8600e-02, -1.9156e-03,
         9.8456e-02,  1.7805e-01,  1.4613e-02, -2.7676e-02,  3.6311e-02,
        -3.1070e-02,  1.7152e-02,  8.6060e-01,  5.0448e-02,  8.2359e-03,
         8.3693e-02, -2.1527e-02,  2.3273e-01, -2.1518e-03,  7.1574e-02,
         4.4977e-01,  2.4700e-01, -6.2928e-02,  9.4834e-02,  2.4783e-01,
         4.1332e-02,  4.3060e-02,  1.5625e-01,  2.3763e-02,  2.6615e-01,
         4.0675e-02,  2.4700e-01, -1.0000e+09,  4.4977e-01,  1.8822e-02,
         5.0742e-02,  2.7198e-01,  2.4700e-01,  3.8493e-01,  2.4864e-02,
         9.4834e-02,  5.2006e-02, -2.7676e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7750e-02,  7.4659e-02,  8.6060e-01, -1.1947e-02,
        -2.5002e-02,  5.0742e-02,  8.6283e-02,  3.6311e-02,  2.4700e-01,
         2.4805e-02,  5.6638e-01,  8.2257e-02, -2.1518e-03,  3.9759e-02,
        -1.0000e+09,  2.9773e-01,  2.9939e-01,  8.8600e-02, -1.9156e-03,
         9.8456e-02,  1.7805e-01,  1.4613e-02, -2.7676e-02,  3.6311e-02,
        -3.1070e-02,  1.7152e-02,  8.6060e-01,  5.0448e-02,  8.2359e-03,
         8.3693e-02, -2.1527e-02,  2.3273e-01, -2.1518e-03,  7.1574e-02,
         4.4977e-01,  2.4700e-01, -6.2928e-02,  9.4834e-02,  2.4783e-01,
         4.1332e-02,  4.3060e-02,  1.5625e-01,  2.3763e-02,  2.6615e-01,
         4.0675e-02,  2.4700e-01, -1.0000e+09,  4.4977e-01,  1.8822e-02,
         5.0742e-02,  2.7198e-01,  2.4700e-01,  3.8493e-01,  2.4864e-02,
         9.4834e-02,  5.2006e-02, -2.7676e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7750e-02,  7.4659e-02,  8.6060e-01, -1.1947e-02,
        -2.5002e-02,  5.0742e-02,  8.6283e-02,  3.6311e-02,  2.4700e-01,
         2.4805e-02,  5.6638e-01,  8.2257e-02, -2.1518e-03,  3.9759e-02,
        -1.0000e+09,  2.9773e-01,  2.9939e-01,  8.8600e-02, -1.9156e-03,
         9.8456e-02,  1.7805e-01,  1.4613e-02, -2.7676e-02,  3.6311e-02,
        -3.1070e-02,  1.7152e-02,  8.6060e-01,  5.0448e-02,  8.2359e-03,
         8.3693e-02, -2.1527e-02,  2.3273e-01, -2.1518e-03,  7.1574e-02,
         4.4977e-01,  2.4700e-01, -6.2928e-02,  9.4834e-02,  2.4783e-01,
         4.1332e-02,  4.3060e-02,  1.5625e-01,  2.3763e-02,  2.6615e-01,
         4.0675e-02,  2.4700e-01, -1.0000e+09,  4.4977e-01,  1.8822e-02,
         5.0742e-02,  2.7198e-01,  2.4700e-01,  3.8493e-01,  2.4864e-02,
         9.4834e-02,  5.2006e-02, -2.7676e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7750e-02,  7.4659e-02,  8.6060e-01, -1.1947e-02,
        -2.5002e-02,  5.0742e-02,  8.6283e-02,  3.6311e-02,  2.4700e-01,
         2.4805e-02,  5.6638e-01,  8.2257e-02, -2.1518e-03,  3.9759e-02,
        -1.0000e+09,  2.9773e-01,  2.9939e-01,  8.8600e-02, -1.9156e-03,
         9.8456e-02,  1.7805e-01,  1.4613e-02, -2.7676e-02,  3.6311e-02,
        -3.1070e-02,  1.7152e-02,  8.6060e-01,  5.0448e-02,  8.2359e-03,
         8.3693e-02, -2.1527e-02,  2.3273e-01, -2.1518e-03,  7.1574e-02,
         4.4977e-01,  2.4700e-01, -6.2928e-02,  9.4834e-02,  2.4783e-01,
         4.1332e-02,  4.3060e-02,  1.5625e-01,  2.3763e-02,  2.6615e-01,
         4.0675e-02,  2.4700e-01, -1.0000e+09,  4.4977e-01,  1.8822e-02,
         5.0742e-02,  2.7198e-01,  2.4700e-01,  3.8493e-01,  2.4864e-02,
         9.4834e-02,  5.2006e-02, -2.7676e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7750e-02,  7.4659e-02,  8.6060e-01, -1.1947e-02,
        -2.5002e-02,  5.0742e-02,  8.6283e-02,  3.6311e-02,  2.4700e-01,
         2.4805e-02,  5.6638e-01,  8.2257e-02, -2.1518e-03,  3.9759e-02,
        -1.0000e+09,  2.9773e-01,  2.9939e-01,  8.8600e-02, -1.9156e-03,
         9.8456e-02,  1.7805e-01,  1.4613e-02, -2.7676e-02,  3.6311e-02,
        -3.1070e-02,  1.7152e-02,  8.6060e-01,  5.0448e-02,  8.2359e-03,
         8.3693e-02, -2.1527e-02,  2.3273e-01, -2.1518e-03,  7.1574e-02,
         4.4977e-01,  2.4700e-01, -6.2928e-02,  9.4834e-02,  2.4783e-01,
         4.1332e-02,  4.3060e-02,  1.5625e-01,  2.3763e-02,  2.6615e-01,
         4.0675e-02,  2.4700e-01, -1.0000e+09,  4.4977e-01,  1.8822e-02,
         5.0742e-02,  2.7198e-01,  2.4700e-01,  3.8493e-01,  2.4864e-02,
         9.4834e-02,  5.2006e-02, -2.7676e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7750e-02,  7.4659e-02,  8.6060e-01, -1.1947e-02,
        -2.5002e-02,  5.0742e-02,  8.6283e-02,  3.6311e-02,  2.4700e-01,
         2.4805e-02,  5.6638e-01,  8.2257e-02, -2.1518e-03,  3.9759e-02,
        -1.0000e+09,  2.9773e-01,  2.9939e-01,  8.8600e-02, -1.9156e-03,
         9.8456e-02,  1.7805e-01,  1.4613e-02, -2.7676e-02,  3.6311e-02,
        -3.1070e-02,  1.7152e-02,  8.6060e-01,  5.0448e-02,  8.2359e-03,
         8.3693e-02, -2.1527e-02,  2.3273e-01, -2.1518e-03,  7.1574e-02,
         4.4977e-01,  2.4700e-01, -6.2928e-02,  9.4834e-02,  2.4783e-01,
         4.1332e-02,  4.3060e-02,  1.5625e-01,  2.3763e-02,  2.6615e-01,
         4.0675e-02,  2.4700e-01, -1.0000e+09,  4.4977e-01,  1.8822e-02,
         5.0742e-02,  2.7198e-01,  2.4700e-01,  3.8493e-01,  2.4864e-02,
         9.4834e-02,  5.2006e-02, -2.7676e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7750e-02,  7.4659e-02,  8.6060e-01, -1.1947e-02,
        -2.5002e-02,  5.0742e-02,  8.6283e-02,  3.6311e-02,  2.4700e-01,
         2.4805e-02,  5.6638e-01,  8.2257e-02, -2.1518e-03,  3.9759e-02,
        -1.0000e+09,  2.9773e-01,  2.9939e-01,  8.8600e-02, -1.9156e-03,
         9.8456e-02,  1.7805e-01,  1.4613e-02, -2.7676e-02,  3.6311e-02,
        -3.1070e-02,  1.7152e-02,  8.6060e-01,  5.0448e-02,  8.2359e-03,
         8.3693e-02, -2.1527e-02,  2.3273e-01, -2.1518e-03,  7.1574e-02,
         4.4977e-01,  2.4700e-01, -6.2928e-02,  9.4834e-02,  2.4783e-01,
         4.1332e-02,  4.3060e-02,  1.5625e-01,  2.3763e-02,  2.6615e-01,
         4.0675e-02,  2.4700e-01, -1.0000e+09,  4.4977e-01,  1.8822e-02,
         5.0742e-02,  2.7198e-01,  2.4700e-01,  3.8493e-01,  2.4864e-02,
         9.4834e-02,  5.2006e-02, -2.7676e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7750e-02,  7.4659e-02,  8.6060e-01, -1.1947e-02,
        -2.5002e-02,  5.0742e-02,  8.6283e-02,  3.6311e-02,  2.4700e-01,
         2.4805e-02,  5.6638e-01,  8.2257e-02, -2.1518e-03,  3.9759e-02,
        -1.0000e+09,  2.9773e-01,  2.9939e-01,  8.8600e-02, -1.9156e-03,
         9.8456e-02,  1.7805e-01,  1.4613e-02, -2.7676e-02,  3.6311e-02,
        -3.1070e-02,  1.7152e-02,  8.6060e-01,  5.0448e-02,  8.2359e-03,
         8.3693e-02, -2.1527e-02,  2.3273e-01, -2.1518e-03,  7.1574e-02,
         4.4977e-01,  2.4700e-01, -6.2928e-02,  9.4834e-02,  2.4783e-01,
         4.1332e-02,  4.3060e-02,  1.5625e-01,  2.3763e-02,  2.6615e-01,
         4.0675e-02,  2.4700e-01, -1.0000e+09,  4.4977e-01,  1.8822e-02,
         5.0742e-02,  2.7198e-01,  2.4700e-01,  3.8493e-01,  2.4864e-02,
         9.4834e-02,  5.2006e-02, -2.7676e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7750e-02,  7.4659e-02,  8.6060e-01, -1.1947e-02,
        -2.5002e-02,  5.0742e-02,  8.6283e-02,  3.6311e-02,  2.4700e-01,
         2.4805e-02,  5.6638e-01,  8.2257e-02, -2.1518e-03,  3.9759e-02,
        -1.0000e+09,  2.9773e-01,  2.9939e-01,  8.8600e-02, -1.9156e-03,
         9.8456e-02,  1.7805e-01,  1.4613e-02, -2.7676e-02,  3.6311e-02,
        -3.1070e-02,  1.7152e-02,  8.6060e-01,  5.0448e-02,  8.2359e-03,
         8.3693e-02, -2.1527e-02,  2.3273e-01, -2.1518e-03,  7.1574e-02,
         4.4977e-01,  2.4700e-01, -6.2928e-02,  9.4834e-02,  2.4783e-01,
         4.1332e-02,  4.3060e-02,  1.5625e-01,  2.3763e-02,  2.6615e-01,
         4.0675e-02,  2.4700e-01, -1.0000e+09,  4.4977e-01,  1.8822e-02,
         5.0742e-02,  2.7198e-01,  2.4700e-01,  3.8493e-01,  2.4864e-02,
         9.4834e-02,  5.2006e-02, -2.7676e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7750e-02,  7.4659e-02,  8.6060e-01, -1.1947e-02,
        -2.5002e-02,  5.0742e-02,  8.6283e-02,  3.6311e-02,  2.4700e-01,
         2.4805e-02,  5.6638e-01,  8.2257e-02, -2.1518e-03,  3.9759e-02,
        -1.0000e+09,  2.9773e-01,  2.9939e-01,  8.8600e-02, -1.9156e-03,
         9.8456e-02,  1.7805e-01,  1.4613e-02, -2.7676e-02,  3.6311e-02,
        -3.1070e-02,  1.7152e-02,  8.6060e-01,  5.0448e-02,  8.2359e-03,
         8.3693e-02, -2.1527e-02,  2.3273e-01, -2.1518e-03,  7.1574e-02,
         4.4977e-01,  2.4700e-01, -6.2928e-02,  9.4834e-02,  2.4783e-01,
         4.1332e-02,  4.3060e-02,  1.5625e-01,  2.3763e-02,  2.6615e-01,
         4.0675e-02,  2.4700e-01, -1.0000e+09,  4.4977e-01,  1.8822e-02,
         5.0742e-02,  2.7198e-01,  2.4700e-01,  3.8493e-01,  2.4864e-02,
         9.4834e-02,  5.2006e-02, -2.7676e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7750e-02,  7.4659e-02,  8.6060e-01, -1.1947e-02,
        -2.5002e-02,  5.0742e-02,  8.6283e-02,  3.6311e-02,  2.4700e-01,
         2.4805e-02,  5.6638e-01,  8.2257e-02, -2.1518e-03,  3.9759e-02,
        -1.0000e+09,  2.9773e-01,  2.9939e-01,  8.8600e-02, -1.9156e-03,
         9.8456e-02,  1.7805e-01,  1.4613e-02, -2.7676e-02,  3.6311e-02,
        -3.1070e-02,  1.7152e-02,  8.6060e-01,  5.0448e-02,  8.2359e-03,
         8.3693e-02, -2.1527e-02,  2.3273e-01, -2.1518e-03,  7.1574e-02,
         4.4977e-01,  2.4700e-01, -6.2928e-02,  9.4834e-02,  2.4783e-01,
         4.1332e-02,  4.3060e-02,  1.5625e-01,  2.3763e-02,  2.6615e-01,
         4.0675e-02,  2.4700e-01, -1.0000e+09,  4.4977e-01,  1.8822e-02,
         5.0742e-02,  2.7198e-01,  2.4700e-01,  3.8493e-01,  2.4864e-02,
         9.4834e-02,  5.2006e-02, -2.7676e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  4.2869e-01,  3.0555e-01,  5.9282e-02,
         6.8550e-01,  8.6283e-02, -1.1332e-01,  3.1000e-01,  2.7507e-02,
         1.3483e-01,  1.7075e-01,  6.8972e-02, -1.0000e+09,  5.0742e-02,
         8.6283e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  4.2869e-01,  3.0555e-01,  5.9282e-02,
         6.8550e-01,  8.6283e-02, -1.1332e-01,  3.1000e-01,  2.7507e-02,
         1.3483e-01,  1.7075e-01,  6.8972e-02, -1.0000e+09,  5.0742e-02,
         8.6283e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  4.2869e-01,  3.0555e-01,  5.9282e-02,
         6.8550e-01,  8.6283e-02, -1.1332e-01,  3.1000e-01,  2.7507e-02,
         1.3483e-01,  1.7075e-01,  6.8972e-02, -1.0000e+09,  5.0742e-02,
         8.6283e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  4.2869e-01,  3.0555e-01,  5.9282e-02,
         6.8550e-01,  8.6283e-02, -1.1332e-01,  3.1000e-01,  2.7507e-02,
         1.3483e-01,  1.7075e-01,  6.8972e-02, -1.0000e+09,  5.0742e-02,
         8.6283e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  4.2869e-01,  3.0555e-01,  5.9282e-02,
         6.8550e-01,  8.6283e-02, -1.1332e-01,  3.1000e-01,  2.7507e-02,
         1.3483e-01,  1.7075e-01,  6.8972e-02, -1.0000e+09,  5.0742e-02,
         8.6283e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  4.2869e-01,  3.0555e-01,  5.9282e-02,
         6.8550e-01,  8.6283e-02, -1.1332e-01,  3.1000e-01,  2.7507e-02,
         1.3483e-01,  1.7075e-01,  6.8972e-02, -1.0000e+09,  5.0742e-02,
         8.6283e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  4.2869e-01,  3.0555e-01,  5.9282e-02,
         6.8550e-01,  8.6283e-02, -1.1332e-01,  3.1000e-01,  2.7507e-02,
         1.3483e-01,  1.7075e-01,  6.8972e-02, -1.0000e+09,  5.0742e-02,
         8.6283e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  4.2869e-01,  3.0555e-01,  5.9282e-02,
         6.8550e-01,  8.6283e-02, -1.1332e-01,  3.1000e-01,  2.7507e-02,
         1.3483e-01,  1.7075e-01,  6.8972e-02, -1.0000e+09,  5.0742e-02,
         8.6283e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  4.2869e-01,  3.0555e-01,  5.9282e-02,
         6.8550e-01,  8.6283e-02, -1.1332e-01,  3.1000e-01,  2.7507e-02,
         1.3483e-01,  1.7075e-01,  6.8972e-02, -1.0000e+09,  5.0742e-02,
         8.6283e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  4.2869e-01,  3.0555e-01,  5.9282e-02,
         6.8550e-01,  8.6283e-02, -1.1332e-01,  3.1000e-01,  2.7507e-02,
         1.3483e-01,  1.7075e-01,  6.8972e-02, -1.0000e+09,  5.0742e-02,
         8.6283e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  4.2869e-01,  3.0555e-01,  5.9282e-02,
         6.8550e-01,  8.6283e-02, -1.1332e-01,  3.1000e-01,  2.7507e-02,
         1.3483e-01,  1.7075e-01,  6.8972e-02, -1.0000e+09,  5.0742e-02,
         8.6283e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  4.2869e-01,  3.0555e-01,  5.9282e-02,
         6.8550e-01,  8.6283e-02, -1.1332e-01,  3.1000e-01,  2.7507e-02,
         1.3483e-01,  1.7075e-01,  6.8972e-02, -1.0000e+09,  5.0742e-02,
         8.6283e-02,  8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.8188e-02,  5.0742e-02, -5.5578e-03,  8.2697e-02,
        -2.0249e-02,  8.3316e-02,  2.9773e-01,  1.2986e-01, -7.5436e-02,
         6.3056e-02, -9.8879e-03,  1.3483e-01,  6.7849e-03,  1.5339e-01,
         1.2945e-01,  1.7281e-01, -9.8375e-03,  5.0742e-02, -2.7676e-02,
         8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.8188e-02,  5.0742e-02, -5.5578e-03,  8.2697e-02,
        -2.0249e-02,  8.3316e-02,  2.9773e-01,  1.2986e-01, -7.5436e-02,
         6.3056e-02, -9.8879e-03,  1.3483e-01,  6.7849e-03,  1.5339e-01,
         1.2945e-01,  1.7281e-01, -9.8375e-03,  5.0742e-02, -2.7676e-02,
         8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.8188e-02,  5.0742e-02, -5.5578e-03,  8.2697e-02,
        -2.0249e-02,  8.3316e-02,  2.9773e-01,  1.2986e-01, -7.5436e-02,
         6.3056e-02, -9.8879e-03,  1.3483e-01,  6.7849e-03,  1.5339e-01,
         1.2945e-01,  1.7281e-01, -9.8375e-03,  5.0742e-02, -2.7676e-02,
         8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.8188e-02,  5.0742e-02, -5.5578e-03,  8.2697e-02,
        -2.0249e-02,  8.3316e-02,  2.9773e-01,  1.2986e-01, -7.5436e-02,
         6.3056e-02, -9.8879e-03,  1.3483e-01,  6.7849e-03,  1.5339e-01,
         1.2945e-01,  1.7281e-01, -9.8375e-03,  5.0742e-02, -2.7676e-02,
         8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.8188e-02,  5.0742e-02, -5.5578e-03,  8.2697e-02,
        -2.0249e-02,  8.3316e-02,  2.9773e-01,  1.2986e-01, -7.5436e-02,
         6.3056e-02, -9.8879e-03,  1.3483e-01,  6.7849e-03,  1.5339e-01,
         1.2945e-01,  1.7281e-01, -9.8375e-03,  5.0742e-02, -2.7676e-02,
         8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.8188e-02,  5.0742e-02, -5.5578e-03,  8.2697e-02,
        -2.0249e-02,  8.3316e-02,  2.9773e-01,  1.2986e-01, -7.5436e-02,
         6.3056e-02, -9.8879e-03,  1.3483e-01,  6.7849e-03,  1.5339e-01,
         1.2945e-01,  1.7281e-01, -9.8375e-03,  5.0742e-02, -2.7676e-02,
         8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.8188e-02,  5.0742e-02, -5.5578e-03,  8.2697e-02,
        -2.0249e-02,  8.3316e-02,  2.9773e-01,  1.2986e-01, -7.5436e-02,
         6.3056e-02, -9.8879e-03,  1.3483e-01,  6.7849e-03,  1.5339e-01,
         1.2945e-01,  1.7281e-01, -9.8375e-03,  5.0742e-02, -2.7676e-02,
         8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.8188e-02,  5.0742e-02, -5.5578e-03,  8.2697e-02,
        -2.0249e-02,  8.3316e-02,  2.9773e-01,  1.2986e-01, -7.5436e-02,
         6.3056e-02, -9.8879e-03,  1.3483e-01,  6.7849e-03,  1.5339e-01,
         1.2945e-01,  1.7281e-01, -9.8375e-03,  5.0742e-02, -2.7676e-02,
         8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.8188e-02,  5.0742e-02, -5.5578e-03,  8.2697e-02,
        -2.0249e-02,  8.3316e-02,  2.9773e-01,  1.2986e-01, -7.5436e-02,
         6.3056e-02, -9.8879e-03,  1.3483e-01,  6.7849e-03,  1.5339e-01,
         1.2945e-01,  1.7281e-01, -9.8375e-03,  5.0742e-02, -2.7676e-02,
         8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.8188e-02,  5.0742e-02, -5.5578e-03,  8.2697e-02,
        -2.0249e-02,  8.3316e-02,  2.9773e-01,  1.2986e-01, -7.5436e-02,
         6.3056e-02, -9.8879e-03,  1.3483e-01,  6.7849e-03,  1.5339e-01,
         1.2945e-01,  1.7281e-01, -9.8375e-03,  5.0742e-02, -2.7676e-02,
         8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.8188e-02,  5.0742e-02, -5.5578e-03,  8.2697e-02,
        -2.0249e-02,  8.3316e-02,  2.9773e-01,  1.2986e-01, -7.5436e-02,
         6.3056e-02, -9.8879e-03,  1.3483e-01,  6.7849e-03,  1.5339e-01,
         1.2945e-01,  1.7281e-01, -9.8375e-03,  5.0742e-02, -2.7676e-02,
         8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.8188e-02,  5.0742e-02, -5.5578e-03,  8.2697e-02,
        -2.0249e-02,  8.3316e-02,  2.9773e-01,  1.2986e-01, -7.5436e-02,
         6.3056e-02, -9.8879e-03,  1.3483e-01,  6.7849e-03,  1.5339e-01,
         1.2945e-01,  1.7281e-01, -9.8375e-03,  5.0742e-02, -2.7676e-02,
         8.6060e-01,  3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  8.2377e-02,  1.7152e-02,  8.6060e-01,  3.1968e-02,
        -1.6417e-02,  1.0113e-01,  4.7304e-02,  1.7046e-01,  2.3436e-02,
         2.4780e-01,  5.6074e-02,  8.6283e-02,  1.0056e-02,  5.6356e-03,
         6.8436e-02,  1.0145e+00,  6.0853e-02,  3.9571e-01, -2.0249e-02,
         5.0742e-02,  4.4187e-02,  5.0742e-02,  2.6259e-02,  2.4960e-02,
        -1.0000e+09,  2.9773e-01,  2.2890e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  8.2377e-02,  1.7152e-02,  8.6060e-01,  3.1968e-02,
        -1.6417e-02,  1.0113e-01,  4.7304e-02,  1.7046e-01,  2.3436e-02,
         2.4780e-01,  5.6074e-02,  8.6283e-02,  1.0056e-02,  5.6356e-03,
         6.8436e-02,  1.0145e+00,  6.0853e-02,  3.9571e-01, -2.0249e-02,
         5.0742e-02,  4.4187e-02,  5.0742e-02,  2.6259e-02,  2.4960e-02,
        -1.0000e+09,  2.9773e-01,  2.2890e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  8.2377e-02,  1.7152e-02,  8.6060e-01,  3.1968e-02,
        -1.6417e-02,  1.0113e-01,  4.7304e-02,  1.7046e-01,  2.3436e-02,
         2.4780e-01,  5.6074e-02,  8.6283e-02,  1.0056e-02,  5.6356e-03,
         6.8436e-02,  1.0145e+00,  6.0853e-02,  3.9571e-01, -2.0249e-02,
         5.0742e-02,  4.4187e-02,  5.0742e-02,  2.6259e-02,  2.4960e-02,
        -1.0000e+09,  2.9773e-01,  2.2890e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  8.2377e-02,  1.7152e-02,  8.6060e-01,  3.1968e-02,
        -1.6417e-02,  1.0113e-01,  4.7304e-02,  1.7046e-01,  2.3436e-02,
         2.4780e-01,  5.6074e-02,  8.6283e-02,  1.0056e-02,  5.6356e-03,
         6.8436e-02,  1.0145e+00,  6.0853e-02,  3.9571e-01, -2.0249e-02,
         5.0742e-02,  4.4187e-02,  5.0742e-02,  2.6259e-02,  2.4960e-02,
        -1.0000e+09,  2.9773e-01,  2.2890e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  8.2377e-02,  1.7152e-02,  8.6060e-01,  3.1968e-02,
        -1.6417e-02,  1.0113e-01,  4.7304e-02,  1.7046e-01,  2.3436e-02,
         2.4780e-01,  5.6074e-02,  8.6283e-02,  1.0056e-02,  5.6356e-03,
         6.8436e-02,  1.0145e+00,  6.0853e-02,  3.9571e-01, -2.0249e-02,
         5.0742e-02,  4.4187e-02,  5.0742e-02,  2.6259e-02,  2.4960e-02,
        -1.0000e+09,  2.9773e-01,  2.2890e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  8.2377e-02,  1.7152e-02,  8.6060e-01,  3.1968e-02,
        -1.6417e-02,  1.0113e-01,  4.7304e-02,  1.7046e-01,  2.3436e-02,
         2.4780e-01,  5.6074e-02,  8.6283e-02,  1.0056e-02,  5.6356e-03,
         6.8436e-02,  1.0145e+00,  6.0853e-02,  3.9571e-01, -2.0249e-02,
         5.0742e-02,  4.4187e-02,  5.0742e-02,  2.6259e-02,  2.4960e-02,
        -1.0000e+09,  2.9773e-01,  2.2890e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  8.2377e-02,  1.7152e-02,  8.6060e-01,  3.1968e-02,
        -1.6417e-02,  1.0113e-01,  4.7304e-02,  1.7046e-01,  2.3436e-02,
         2.4780e-01,  5.6074e-02,  8.6283e-02,  1.0056e-02,  5.6356e-03,
         6.8436e-02,  1.0145e+00,  6.0853e-02,  3.9571e-01, -2.0249e-02,
         5.0742e-02,  4.4187e-02,  5.0742e-02,  2.6259e-02,  2.4960e-02,
        -1.0000e+09,  2.9773e-01,  2.2890e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  8.2377e-02,  1.7152e-02,  8.6060e-01,  3.1968e-02,
        -1.6417e-02,  1.0113e-01,  4.7304e-02,  1.7046e-01,  2.3436e-02,
         2.4780e-01,  5.6074e-02,  8.6283e-02,  1.0056e-02,  5.6356e-03,
         6.8436e-02,  1.0145e+00,  6.0853e-02,  3.9571e-01, -2.0249e-02,
         5.0742e-02,  4.4187e-02,  5.0742e-02,  2.6259e-02,  2.4960e-02,
        -1.0000e+09,  2.9773e-01,  2.2890e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  8.2377e-02,  1.7152e-02,  8.6060e-01,  3.1968e-02,
        -1.6417e-02,  1.0113e-01,  4.7304e-02,  1.7046e-01,  2.3436e-02,
         2.4780e-01,  5.6074e-02,  8.6283e-02,  1.0056e-02,  5.6356e-03,
         6.8436e-02,  1.0145e+00,  6.0853e-02,  3.9571e-01, -2.0249e-02,
         5.0742e-02,  4.4187e-02,  5.0742e-02,  2.6259e-02,  2.4960e-02,
        -1.0000e+09,  2.9773e-01,  2.2890e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  8.2377e-02,  1.7152e-02,  8.6060e-01,  3.1968e-02,
        -1.6417e-02,  1.0113e-01,  4.7304e-02,  1.7046e-01,  2.3436e-02,
         2.4780e-01,  5.6074e-02,  8.6283e-02,  1.0056e-02,  5.6356e-03,
         6.8436e-02,  1.0145e+00,  6.0853e-02,  3.9571e-01, -2.0249e-02,
         5.0742e-02,  4.4187e-02,  5.0742e-02,  2.6259e-02,  2.4960e-02,
        -1.0000e+09,  2.9773e-01,  2.2890e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  8.2377e-02,  1.7152e-02,  8.6060e-01,  3.1968e-02,
        -1.6417e-02,  1.0113e-01,  4.7304e-02,  1.7046e-01,  2.3436e-02,
         2.4780e-01,  5.6074e-02,  8.6283e-02,  1.0056e-02,  5.6356e-03,
         6.8436e-02,  1.0145e+00,  6.0853e-02,  3.9571e-01, -2.0249e-02,
         5.0742e-02,  4.4187e-02,  5.0742e-02,  2.6259e-02,  2.4960e-02,
        -1.0000e+09,  2.9773e-01,  2.2890e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  8.2377e-02,  1.7152e-02,  8.6060e-01,  3.1968e-02,
        -1.6417e-02,  1.0113e-01,  4.7304e-02,  1.7046e-01,  2.3436e-02,
         2.4780e-01,  5.6074e-02,  8.6283e-02,  1.0056e-02,  5.6356e-03,
         6.8436e-02,  1.0145e+00,  6.0853e-02,  3.9571e-01, -2.0249e-02,
         5.0742e-02,  4.4187e-02,  5.0742e-02,  2.6259e-02,  2.4960e-02,
        -1.0000e+09,  2.9773e-01,  2.2890e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7152e-02,  8.6060e-01,  4.8389e-02,  6.8220e-03,
         8.0457e-01, -3.1070e-02,  2.9773e-01,  8.2305e-02,  4.0926e-01,
         1.2018e-01,  1.3678e-01,  1.0208e-01,  6.5843e-03,  2.6615e-01,
        -5.2178e-02,  3.0555e-01, -5.4577e-02,  4.4381e-02,  1.4744e+00,
         8.6283e-02,  1.9441e-02,  3.0836e-02, -1.4800e-02,  9.8029e-01,
         6.0246e-01,  1.0867e-01,  3.2846e-01,  6.8924e-02, -1.9783e-02,
         4.0926e-01,  1.2945e-01,  2.9773e-01,  8.9674e-02,  4.4977e-01,
         7.4060e-03,  2.9773e-01,  1.0208e-01,  5.0742e-02,  4.1328e-01,
         3.1152e-01, -1.0000e+09,  2.5623e-01,  8.6283e-02,  3.6311e-02,
        -1.9811e-02,  1.5625e-01, -3.1070e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7152e-02,  8.6060e-01,  4.8389e-02,  6.8220e-03,
         8.0457e-01, -3.1070e-02,  2.9773e-01,  8.2305e-02,  4.0926e-01,
         1.2018e-01,  1.3678e-01,  1.0208e-01,  6.5843e-03,  2.6615e-01,
        -5.2178e-02,  3.0555e-01, -5.4577e-02,  4.4381e-02,  1.4744e+00,
         8.6283e-02,  1.9441e-02,  3.0836e-02, -1.4800e-02,  9.8029e-01,
         6.0246e-01,  1.0867e-01,  3.2846e-01,  6.8924e-02, -1.9783e-02,
         4.0926e-01,  1.2945e-01,  2.9773e-01,  8.9674e-02,  4.4977e-01,
         7.4060e-03,  2.9773e-01,  1.0208e-01,  5.0742e-02,  4.1328e-01,
         3.1152e-01, -1.0000e+09,  2.5623e-01,  8.6283e-02,  3.6311e-02,
        -1.9811e-02,  1.5625e-01, -3.1070e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7152e-02,  8.6060e-01,  4.8389e-02,  6.8220e-03,
         8.0457e-01, -3.1070e-02,  2.9773e-01,  8.2305e-02,  4.0926e-01,
         1.2018e-01,  1.3678e-01,  1.0208e-01,  6.5843e-03,  2.6615e-01,
        -5.2178e-02,  3.0555e-01, -5.4577e-02,  4.4381e-02,  1.4744e+00,
         8.6283e-02,  1.9441e-02,  3.0836e-02, -1.4800e-02,  9.8029e-01,
         6.0246e-01,  1.0867e-01,  3.2846e-01,  6.8924e-02, -1.9783e-02,
         4.0926e-01,  1.2945e-01,  2.9773e-01,  8.9674e-02,  4.4977e-01,
         7.4060e-03,  2.9773e-01,  1.0208e-01,  5.0742e-02,  4.1328e-01,
         3.1152e-01, -1.0000e+09,  2.5623e-01,  8.6283e-02,  3.6311e-02,
        -1.9811e-02,  1.5625e-01, -3.1070e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7152e-02,  8.6060e-01,  4.8389e-02,  6.8220e-03,
         8.0457e-01, -3.1070e-02,  2.9773e-01,  8.2305e-02,  4.0926e-01,
         1.2018e-01,  1.3678e-01,  1.0208e-01,  6.5843e-03,  2.6615e-01,
        -5.2178e-02,  3.0555e-01, -5.4577e-02,  4.4381e-02,  1.4744e+00,
         8.6283e-02,  1.9441e-02,  3.0836e-02, -1.4800e-02,  9.8029e-01,
         6.0246e-01,  1.0867e-01,  3.2846e-01,  6.8924e-02, -1.9783e-02,
         4.0926e-01,  1.2945e-01,  2.9773e-01,  8.9674e-02,  4.4977e-01,
         7.4060e-03,  2.9773e-01,  1.0208e-01,  5.0742e-02,  4.1328e-01,
         3.1152e-01, -1.0000e+09,  2.5623e-01,  8.6283e-02,  3.6311e-02,
        -1.9811e-02,  1.5625e-01, -3.1070e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7152e-02,  8.6060e-01,  4.8389e-02,  6.8220e-03,
         8.0457e-01, -3.1070e-02,  2.9773e-01,  8.2305e-02,  4.0926e-01,
         1.2018e-01,  1.3678e-01,  1.0208e-01,  6.5843e-03,  2.6615e-01,
        -5.2178e-02,  3.0555e-01, -5.4577e-02,  4.4381e-02,  1.4744e+00,
         8.6283e-02,  1.9441e-02,  3.0836e-02, -1.4800e-02,  9.8029e-01,
         6.0246e-01,  1.0867e-01,  3.2846e-01,  6.8924e-02, -1.9783e-02,
         4.0926e-01,  1.2945e-01,  2.9773e-01,  8.9674e-02,  4.4977e-01,
         7.4060e-03,  2.9773e-01,  1.0208e-01,  5.0742e-02,  4.1328e-01,
         3.1152e-01, -1.0000e+09,  2.5623e-01,  8.6283e-02,  3.6311e-02,
        -1.9811e-02,  1.5625e-01, -3.1070e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7152e-02,  8.6060e-01,  4.8389e-02,  6.8220e-03,
         8.0457e-01, -3.1070e-02,  2.9773e-01,  8.2305e-02,  4.0926e-01,
         1.2018e-01,  1.3678e-01,  1.0208e-01,  6.5843e-03,  2.6615e-01,
        -5.2178e-02,  3.0555e-01, -5.4577e-02,  4.4381e-02,  1.4744e+00,
         8.6283e-02,  1.9441e-02,  3.0836e-02, -1.4800e-02,  9.8029e-01,
         6.0246e-01,  1.0867e-01,  3.2846e-01,  6.8924e-02, -1.9783e-02,
         4.0926e-01,  1.2945e-01,  2.9773e-01,  8.9674e-02,  4.4977e-01,
         7.4060e-03,  2.9773e-01,  1.0208e-01,  5.0742e-02,  4.1328e-01,
         3.1152e-01, -1.0000e+09,  2.5623e-01,  8.6283e-02,  3.6311e-02,
        -1.9811e-02,  1.5625e-01, -3.1070e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7152e-02,  8.6060e-01,  4.8389e-02,  6.8220e-03,
         8.0457e-01, -3.1070e-02,  2.9773e-01,  8.2305e-02,  4.0926e-01,
         1.2018e-01,  1.3678e-01,  1.0208e-01,  6.5843e-03,  2.6615e-01,
        -5.2178e-02,  3.0555e-01, -5.4577e-02,  4.4381e-02,  1.4744e+00,
         8.6283e-02,  1.9441e-02,  3.0836e-02, -1.4800e-02,  9.8029e-01,
         6.0246e-01,  1.0867e-01,  3.2846e-01,  6.8924e-02, -1.9783e-02,
         4.0926e-01,  1.2945e-01,  2.9773e-01,  8.9674e-02,  4.4977e-01,
         7.4060e-03,  2.9773e-01,  1.0208e-01,  5.0742e-02,  4.1328e-01,
         3.1152e-01, -1.0000e+09,  2.5623e-01,  8.6283e-02,  3.6311e-02,
        -1.9811e-02,  1.5625e-01, -3.1070e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7152e-02,  8.6060e-01,  4.8389e-02,  6.8220e-03,
         8.0457e-01, -3.1070e-02,  2.9773e-01,  8.2305e-02,  4.0926e-01,
         1.2018e-01,  1.3678e-01,  1.0208e-01,  6.5843e-03,  2.6615e-01,
        -5.2178e-02,  3.0555e-01, -5.4577e-02,  4.4381e-02,  1.4744e+00,
         8.6283e-02,  1.9441e-02,  3.0836e-02, -1.4800e-02,  9.8029e-01,
         6.0246e-01,  1.0867e-01,  3.2846e-01,  6.8924e-02, -1.9783e-02,
         4.0926e-01,  1.2945e-01,  2.9773e-01,  8.9674e-02,  4.4977e-01,
         7.4060e-03,  2.9773e-01,  1.0208e-01,  5.0742e-02,  4.1328e-01,
         3.1152e-01, -1.0000e+09,  2.5623e-01,  8.6283e-02,  3.6311e-02,
        -1.9811e-02,  1.5625e-01, -3.1070e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7152e-02,  8.6060e-01,  4.8389e-02,  6.8220e-03,
         8.0457e-01, -3.1070e-02,  2.9773e-01,  8.2305e-02,  4.0926e-01,
         1.2018e-01,  1.3678e-01,  1.0208e-01,  6.5843e-03,  2.6615e-01,
        -5.2178e-02,  3.0555e-01, -5.4577e-02,  4.4381e-02,  1.4744e+00,
         8.6283e-02,  1.9441e-02,  3.0836e-02, -1.4800e-02,  9.8029e-01,
         6.0246e-01,  1.0867e-01,  3.2846e-01,  6.8924e-02, -1.9783e-02,
         4.0926e-01,  1.2945e-01,  2.9773e-01,  8.9674e-02,  4.4977e-01,
         7.4060e-03,  2.9773e-01,  1.0208e-01,  5.0742e-02,  4.1328e-01,
         3.1152e-01, -1.0000e+09,  2.5623e-01,  8.6283e-02,  3.6311e-02,
        -1.9811e-02,  1.5625e-01, -3.1070e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7152e-02,  8.6060e-01,  4.8389e-02,  6.8220e-03,
         8.0457e-01, -3.1070e-02,  2.9773e-01,  8.2305e-02,  4.0926e-01,
         1.2018e-01,  1.3678e-01,  1.0208e-01,  6.5843e-03,  2.6615e-01,
        -5.2178e-02,  3.0555e-01, -5.4577e-02,  4.4381e-02,  1.4744e+00,
         8.6283e-02,  1.9441e-02,  3.0836e-02, -1.4800e-02,  9.8029e-01,
         6.0246e-01,  1.0867e-01,  3.2846e-01,  6.8924e-02, -1.9783e-02,
         4.0926e-01,  1.2945e-01,  2.9773e-01,  8.9674e-02,  4.4977e-01,
         7.4060e-03,  2.9773e-01,  1.0208e-01,  5.0742e-02,  4.1328e-01,
         3.1152e-01, -1.0000e+09,  2.5623e-01,  8.6283e-02,  3.6311e-02,
        -1.9811e-02,  1.5625e-01, -3.1070e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7152e-02,  8.6060e-01,  4.8389e-02,  6.8220e-03,
         8.0457e-01, -3.1070e-02,  2.9773e-01,  8.2305e-02,  4.0926e-01,
         1.2018e-01,  1.3678e-01,  1.0208e-01,  6.5843e-03,  2.6615e-01,
        -5.2178e-02,  3.0555e-01, -5.4577e-02,  4.4381e-02,  1.4744e+00,
         8.6283e-02,  1.9441e-02,  3.0836e-02, -1.4800e-02,  9.8029e-01,
         6.0246e-01,  1.0867e-01,  3.2846e-01,  6.8924e-02, -1.9783e-02,
         4.0926e-01,  1.2945e-01,  2.9773e-01,  8.9674e-02,  4.4977e-01,
         7.4060e-03,  2.9773e-01,  1.0208e-01,  5.0742e-02,  4.1328e-01,
         3.1152e-01, -1.0000e+09,  2.5623e-01,  8.6283e-02,  3.6311e-02,
        -1.9811e-02,  1.5625e-01, -3.1070e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  1.7152e-02,  8.6060e-01,  4.8389e-02,  6.8220e-03,
         8.0457e-01, -3.1070e-02,  2.9773e-01,  8.2305e-02,  4.0926e-01,
         1.2018e-01,  1.3678e-01,  1.0208e-01,  6.5843e-03,  2.6615e-01,
        -5.2178e-02,  3.0555e-01, -5.4577e-02,  4.4381e-02,  1.4744e+00,
         8.6283e-02,  1.9441e-02,  3.0836e-02, -1.4800e-02,  9.8029e-01,
         6.0246e-01,  1.0867e-01,  3.2846e-01,  6.8924e-02, -1.9783e-02,
         4.0926e-01,  1.2945e-01,  2.9773e-01,  8.9674e-02,  4.4977e-01,
         7.4060e-03,  2.9773e-01,  1.0208e-01,  5.0742e-02,  4.1328e-01,
         3.1152e-01, -1.0000e+09,  2.5623e-01,  8.6283e-02,  3.6311e-02,
        -1.9811e-02,  1.5625e-01, -3.1070e-02,  8.6283e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  5.7563e-02,  4.2367e-01,  3.6949e-01,
        -2.1518e-03,  8.6283e-02,  1.0056e-02,  3.4703e-01,  2.8219e-01,
         5.0742e-02,  1.1194e+00,  3.1739e-02, -1.0000e+09,  8.6283e-02,
         5.5860e-02,  4.2570e-01,  2.5738e-01,  3.2155e-01,  1.4615e-03,
         8.6283e-02,  1.0056e-02,  2.2431e-02, -2.4761e-03,  6.3272e-02,
         2.1402e-01,  1.9388e-01,  1.3872e-01,  4.7042e-01, -1.0000e+09,
         3.6311e-02,  1.4934e-02,  3.3151e-02,  3.7159e-01,  3.1844e-01,
         4.7042e-01,  6.5579e-02, -2.7676e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  5.7563e-02,  4.2367e-01,  3.6949e-01,
        -2.1518e-03,  8.6283e-02,  1.0056e-02,  3.4703e-01,  2.8219e-01,
         5.0742e-02,  1.1194e+00,  3.1739e-02, -1.0000e+09,  8.6283e-02,
         5.5860e-02,  4.2570e-01,  2.5738e-01,  3.2155e-01,  1.4615e-03,
         8.6283e-02,  1.0056e-02,  2.2431e-02, -2.4761e-03,  6.3272e-02,
         2.1402e-01,  1.9388e-01,  1.3872e-01,  4.7042e-01, -1.0000e+09,
         3.6311e-02,  1.4934e-02,  3.3151e-02,  3.7159e-01,  3.1844e-01,
         4.7042e-01,  6.5579e-02, -2.7676e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  5.7563e-02,  4.2367e-01,  3.6949e-01,
        -2.1518e-03,  8.6283e-02,  1.0056e-02,  3.4703e-01,  2.8219e-01,
         5.0742e-02,  1.1194e+00,  3.1739e-02, -1.0000e+09,  8.6283e-02,
         5.5860e-02,  4.2570e-01,  2.5738e-01,  3.2155e-01,  1.4615e-03,
         8.6283e-02,  1.0056e-02,  2.2431e-02, -2.4761e-03,  6.3272e-02,
         2.1402e-01,  1.9388e-01,  1.3872e-01,  4.7042e-01, -1.0000e+09,
         3.6311e-02,  1.4934e-02,  3.3151e-02,  3.7159e-01,  3.1844e-01,
         4.7042e-01,  6.5579e-02, -2.7676e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  5.7563e-02,  4.2367e-01,  3.6949e-01,
        -2.1518e-03,  8.6283e-02,  1.0056e-02,  3.4703e-01,  2.8219e-01,
         5.0742e-02,  1.1194e+00,  3.1739e-02, -1.0000e+09,  8.6283e-02,
         5.5860e-02,  4.2570e-01,  2.5738e-01,  3.2155e-01,  1.4615e-03,
         8.6283e-02,  1.0056e-02,  2.2431e-02, -2.4761e-03,  6.3272e-02,
         2.1402e-01,  1.9388e-01,  1.3872e-01,  4.7042e-01, -1.0000e+09,
         3.6311e-02,  1.4934e-02,  3.3151e-02,  3.7159e-01,  3.1844e-01,
         4.7042e-01,  6.5579e-02, -2.7676e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  5.7563e-02,  4.2367e-01,  3.6949e-01,
        -2.1518e-03,  8.6283e-02,  1.0056e-02,  3.4703e-01,  2.8219e-01,
         5.0742e-02,  1.1194e+00,  3.1739e-02, -1.0000e+09,  8.6283e-02,
         5.5860e-02,  4.2570e-01,  2.5738e-01,  3.2155e-01,  1.4615e-03,
         8.6283e-02,  1.0056e-02,  2.2431e-02, -2.4761e-03,  6.3272e-02,
         2.1402e-01,  1.9388e-01,  1.3872e-01,  4.7042e-01, -1.0000e+09,
         3.6311e-02,  1.4934e-02,  3.3151e-02,  3.7159e-01,  3.1844e-01,
         4.7042e-01,  6.5579e-02, -2.7676e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  5.7563e-02,  4.2367e-01,  3.6949e-01,
        -2.1518e-03,  8.6283e-02,  1.0056e-02,  3.4703e-01,  2.8219e-01,
         5.0742e-02,  1.1194e+00,  3.1739e-02, -1.0000e+09,  8.6283e-02,
         5.5860e-02,  4.2570e-01,  2.5738e-01,  3.2155e-01,  1.4615e-03,
         8.6283e-02,  1.0056e-02,  2.2431e-02, -2.4761e-03,  6.3272e-02,
         2.1402e-01,  1.9388e-01,  1.3872e-01,  4.7042e-01, -1.0000e+09,
         3.6311e-02,  1.4934e-02,  3.3151e-02,  3.7159e-01,  3.1844e-01,
         4.7042e-01,  6.5579e-02, -2.7676e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  5.7563e-02,  4.2367e-01,  3.6949e-01,
        -2.1518e-03,  8.6283e-02,  1.0056e-02,  3.4703e-01,  2.8219e-01,
         5.0742e-02,  1.1194e+00,  3.1739e-02, -1.0000e+09,  8.6283e-02,
         5.5860e-02,  4.2570e-01,  2.5738e-01,  3.2155e-01,  1.4615e-03,
         8.6283e-02,  1.0056e-02,  2.2431e-02, -2.4761e-03,  6.3272e-02,
         2.1402e-01,  1.9388e-01,  1.3872e-01,  4.7042e-01, -1.0000e+09,
         3.6311e-02,  1.4934e-02,  3.3151e-02,  3.7159e-01,  3.1844e-01,
         4.7042e-01,  6.5579e-02, -2.7676e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  5.7563e-02,  4.2367e-01,  3.6949e-01,
        -2.1518e-03,  8.6283e-02,  1.0056e-02,  3.4703e-01,  2.8219e-01,
         5.0742e-02,  1.1194e+00,  3.1739e-02, -1.0000e+09,  8.6283e-02,
         5.5860e-02,  4.2570e-01,  2.5738e-01,  3.2155e-01,  1.4615e-03,
         8.6283e-02,  1.0056e-02,  2.2431e-02, -2.4761e-03,  6.3272e-02,
         2.1402e-01,  1.9388e-01,  1.3872e-01,  4.7042e-01, -1.0000e+09,
         3.6311e-02,  1.4934e-02,  3.3151e-02,  3.7159e-01,  3.1844e-01,
         4.7042e-01,  6.5579e-02, -2.7676e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  5.7563e-02,  4.2367e-01,  3.6949e-01,
        -2.1518e-03,  8.6283e-02,  1.0056e-02,  3.4703e-01,  2.8219e-01,
         5.0742e-02,  1.1194e+00,  3.1739e-02, -1.0000e+09,  8.6283e-02,
         5.5860e-02,  4.2570e-01,  2.5738e-01,  3.2155e-01,  1.4615e-03,
         8.6283e-02,  1.0056e-02,  2.2431e-02, -2.4761e-03,  6.3272e-02,
         2.1402e-01,  1.9388e-01,  1.3872e-01,  4.7042e-01, -1.0000e+09,
         3.6311e-02,  1.4934e-02,  3.3151e-02,  3.7159e-01,  3.1844e-01,
         4.7042e-01,  6.5579e-02, -2.7676e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  5.7563e-02,  4.2367e-01,  3.6949e-01,
        -2.1518e-03,  8.6283e-02,  1.0056e-02,  3.4703e-01,  2.8219e-01,
         5.0742e-02,  1.1194e+00,  3.1739e-02, -1.0000e+09,  8.6283e-02,
         5.5860e-02,  4.2570e-01,  2.5738e-01,  3.2155e-01,  1.4615e-03,
         8.6283e-02,  1.0056e-02,  2.2431e-02, -2.4761e-03,  6.3272e-02,
         2.1402e-01,  1.9388e-01,  1.3872e-01,  4.7042e-01, -1.0000e+09,
         3.6311e-02,  1.4934e-02,  3.3151e-02,  3.7159e-01,  3.1844e-01,
         4.7042e-01,  6.5579e-02, -2.7676e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  5.7563e-02,  4.2367e-01,  3.6949e-01,
        -2.1518e-03,  8.6283e-02,  1.0056e-02,  3.4703e-01,  2.8219e-01,
         5.0742e-02,  1.1194e+00,  3.1739e-02, -1.0000e+09,  8.6283e-02,
         5.5860e-02,  4.2570e-01,  2.5738e-01,  3.2155e-01,  1.4615e-03,
         8.6283e-02,  1.0056e-02,  2.2431e-02, -2.4761e-03,  6.3272e-02,
         2.1402e-01,  1.9388e-01,  1.3872e-01,  4.7042e-01, -1.0000e+09,
         3.6311e-02,  1.4934e-02,  3.3151e-02,  3.7159e-01,  3.1844e-01,
         4.7042e-01,  6.5579e-02, -2.7676e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -1.9409e-01,  5.7563e-02,  4.2367e-01,  3.6949e-01,
        -2.1518e-03,  8.6283e-02,  1.0056e-02,  3.4703e-01,  2.8219e-01,
         5.0742e-02,  1.1194e+00,  3.1739e-02, -1.0000e+09,  8.6283e-02,
         5.5860e-02,  4.2570e-01,  2.5738e-01,  3.2155e-01,  1.4615e-03,
         8.6283e-02,  1.0056e-02,  2.2431e-02, -2.4761e-03,  6.3272e-02,
         2.1402e-01,  1.9388e-01,  1.3872e-01,  4.7042e-01, -1.0000e+09,
         3.6311e-02,  1.4934e-02,  3.3151e-02,  3.7159e-01,  3.1844e-01,
         4.7042e-01,  6.5579e-02, -2.7676e-02,  8.6060e-01,  3.2138e-01,
         7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -1.0000e+09,
         7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  5.7358e-01,  6.6538e-02,  2.9113e-01,  8.6283e-02,
         6.6489e-03,  3.2261e-02,  4.3390e-02,  5.0742e-02, -1.0000e+09,
         2.9773e-01,  2.6645e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  5.7358e-01,  6.6538e-02,  2.9113e-01,  8.6283e-02,
         6.6489e-03,  3.2261e-02,  4.3390e-02,  5.0742e-02, -1.0000e+09,
         2.9773e-01,  2.6645e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  5.7358e-01,  6.6538e-02,  2.9113e-01,  8.6283e-02,
         6.6489e-03,  3.2261e-02,  4.3390e-02,  5.0742e-02, -1.0000e+09,
         2.9773e-01,  2.6645e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  5.7358e-01,  6.6538e-02,  2.9113e-01,  8.6283e-02,
         6.6489e-03,  3.2261e-02,  4.3390e-02,  5.0742e-02, -1.0000e+09,
         2.9773e-01,  2.6645e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  5.7358e-01,  6.6538e-02,  2.9113e-01,  8.6283e-02,
         6.6489e-03,  3.2261e-02,  4.3390e-02,  5.0742e-02, -1.0000e+09,
         2.9773e-01,  2.6645e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  5.7358e-01,  6.6538e-02,  2.9113e-01,  8.6283e-02,
         6.6489e-03,  3.2261e-02,  4.3390e-02,  5.0742e-02, -1.0000e+09,
         2.9773e-01,  2.6645e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  5.7358e-01,  6.6538e-02,  2.9113e-01,  8.6283e-02,
         6.6489e-03,  3.2261e-02,  4.3390e-02,  5.0742e-02, -1.0000e+09,
         2.9773e-01,  2.6645e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  5.7358e-01,  6.6538e-02,  2.9113e-01,  8.6283e-02,
         6.6489e-03,  3.2261e-02,  4.3390e-02,  5.0742e-02, -1.0000e+09,
         2.9773e-01,  2.6645e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  5.7358e-01,  6.6538e-02,  2.9113e-01,  8.6283e-02,
         6.6489e-03,  3.2261e-02,  4.3390e-02,  5.0742e-02, -1.0000e+09,
         2.9773e-01,  2.6645e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  5.7358e-01,  6.6538e-02,  2.9113e-01,  8.6283e-02,
         6.6489e-03,  3.2261e-02,  4.3390e-02,  5.0742e-02, -1.0000e+09,
         2.9773e-01,  2.6645e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  5.7358e-01,  6.6538e-02,  2.9113e-01,  8.6283e-02,
         6.6489e-03,  3.2261e-02,  4.3390e-02,  5.0742e-02, -1.0000e+09,
         2.9773e-01,  2.6645e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02,  5.7358e-01,  6.6538e-02,  2.9113e-01,  8.6283e-02,
         6.6489e-03,  3.2261e-02,  4.3390e-02,  5.0742e-02, -1.0000e+09,
         2.9773e-01,  2.6645e-02,  8.6283e-02,  8.6060e-01,  3.2138e-01,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -1.0000e+09,  7.7928e-02,  7.7928e-02,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -5.4577e-02,  1.7344e-01,  8.4271e-02,  8.4989e-02,
         9.4834e-02,  8.5185e-02,  1.6863e-01,  1.5425e-01, -2.4761e-03,
         1.2945e-01,  2.4700e-01,  1.6521e-01, -1.5069e-02,  6.6274e-01,
         3.1585e-01,  8.2345e-02,  7.2337e-02,  4.8633e-03,  5.8823e-02,
         2.9773e-01, -1.5537e-02,  7.3201e-02,  6.0246e-01,  7.7950e-02,
         2.5623e-01,  4.2367e-01,  1.4934e-02,  8.6283e-02,  1.0056e-02,
         7.5139e-02,  1.5625e-01,  1.4351e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -5.4577e-02,  1.7344e-01,  8.4271e-02,  8.4989e-02,
         9.4834e-02,  8.5185e-02,  1.6863e-01,  1.5425e-01, -2.4761e-03,
         1.2945e-01,  2.4700e-01,  1.6521e-01, -1.5069e-02,  6.6274e-01,
         3.1585e-01,  8.2345e-02,  7.2337e-02,  4.8633e-03,  5.8823e-02,
         2.9773e-01, -1.5537e-02,  7.3201e-02,  6.0246e-01,  7.7950e-02,
         2.5623e-01,  4.2367e-01,  1.4934e-02,  8.6283e-02,  1.0056e-02,
         7.5139e-02,  1.5625e-01,  1.4351e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -5.4577e-02,  1.7344e-01,  8.4271e-02,  8.4989e-02,
         9.4834e-02,  8.5185e-02,  1.6863e-01,  1.5425e-01, -2.4761e-03,
         1.2945e-01,  2.4700e-01,  1.6521e-01, -1.5069e-02,  6.6274e-01,
         3.1585e-01,  8.2345e-02,  7.2337e-02,  4.8633e-03,  5.8823e-02,
         2.9773e-01, -1.5537e-02,  7.3201e-02,  6.0246e-01,  7.7950e-02,
         2.5623e-01,  4.2367e-01,  1.4934e-02,  8.6283e-02,  1.0056e-02,
         7.5139e-02,  1.5625e-01,  1.4351e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -5.4577e-02,  1.7344e-01,  8.4271e-02,  8.4989e-02,
         9.4834e-02,  8.5185e-02,  1.6863e-01,  1.5425e-01, -2.4761e-03,
         1.2945e-01,  2.4700e-01,  1.6521e-01, -1.5069e-02,  6.6274e-01,
         3.1585e-01,  8.2345e-02,  7.2337e-02,  4.8633e-03,  5.8823e-02,
         2.9773e-01, -1.5537e-02,  7.3201e-02,  6.0246e-01,  7.7950e-02,
         2.5623e-01,  4.2367e-01,  1.4934e-02,  8.6283e-02,  1.0056e-02,
         7.5139e-02,  1.5625e-01,  1.4351e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -5.4577e-02,  1.7344e-01,  8.4271e-02,  8.4989e-02,
         9.4834e-02,  8.5185e-02,  1.6863e-01,  1.5425e-01, -2.4761e-03,
         1.2945e-01,  2.4700e-01,  1.6521e-01, -1.5069e-02,  6.6274e-01,
         3.1585e-01,  8.2345e-02,  7.2337e-02,  4.8633e-03,  5.8823e-02,
         2.9773e-01, -1.5537e-02,  7.3201e-02,  6.0246e-01,  7.7950e-02,
         2.5623e-01,  4.2367e-01,  1.4934e-02,  8.6283e-02,  1.0056e-02,
         7.5139e-02,  1.5625e-01,  1.4351e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -5.4577e-02,  1.7344e-01,  8.4271e-02,  8.4989e-02,
         9.4834e-02,  8.5185e-02,  1.6863e-01,  1.5425e-01, -2.4761e-03,
         1.2945e-01,  2.4700e-01,  1.6521e-01, -1.5069e-02,  6.6274e-01,
         3.1585e-01,  8.2345e-02,  7.2337e-02,  4.8633e-03,  5.8823e-02,
         2.9773e-01, -1.5537e-02,  7.3201e-02,  6.0246e-01,  7.7950e-02,
         2.5623e-01,  4.2367e-01,  1.4934e-02,  8.6283e-02,  1.0056e-02,
         7.5139e-02,  1.5625e-01,  1.4351e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -5.4577e-02,  1.7344e-01,  8.4271e-02,  8.4989e-02,
         9.4834e-02,  8.5185e-02,  1.6863e-01,  1.5425e-01, -2.4761e-03,
         1.2945e-01,  2.4700e-01,  1.6521e-01, -1.5069e-02,  6.6274e-01,
         3.1585e-01,  8.2345e-02,  7.2337e-02,  4.8633e-03,  5.8823e-02,
         2.9773e-01, -1.5537e-02,  7.3201e-02,  6.0246e-01,  7.7950e-02,
         2.5623e-01,  4.2367e-01,  1.4934e-02,  8.6283e-02,  1.0056e-02,
         7.5139e-02,  1.5625e-01,  1.4351e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -5.4577e-02,  1.7344e-01,  8.4271e-02,  8.4989e-02,
         9.4834e-02,  8.5185e-02,  1.6863e-01,  1.5425e-01, -2.4761e-03,
         1.2945e-01,  2.4700e-01,  1.6521e-01, -1.5069e-02,  6.6274e-01,
         3.1585e-01,  8.2345e-02,  7.2337e-02,  4.8633e-03,  5.8823e-02,
         2.9773e-01, -1.5537e-02,  7.3201e-02,  6.0246e-01,  7.7950e-02,
         2.5623e-01,  4.2367e-01,  1.4934e-02,  8.6283e-02,  1.0056e-02,
         7.5139e-02,  1.5625e-01,  1.4351e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -5.4577e-02,  1.7344e-01,  8.4271e-02,  8.4989e-02,
         9.4834e-02,  8.5185e-02,  1.6863e-01,  1.5425e-01, -2.4761e-03,
         1.2945e-01,  2.4700e-01,  1.6521e-01, -1.5069e-02,  6.6274e-01,
         3.1585e-01,  8.2345e-02,  7.2337e-02,  4.8633e-03,  5.8823e-02,
         2.9773e-01, -1.5537e-02,  7.3201e-02,  6.0246e-01,  7.7950e-02,
         2.5623e-01,  4.2367e-01,  1.4934e-02,  8.6283e-02,  1.0056e-02,
         7.5139e-02,  1.5625e-01,  1.4351e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -5.4577e-02,  1.7344e-01,  8.4271e-02,  8.4989e-02,
         9.4834e-02,  8.5185e-02,  1.6863e-01,  1.5425e-01, -2.4761e-03,
         1.2945e-01,  2.4700e-01,  1.6521e-01, -1.5069e-02,  6.6274e-01,
         3.1585e-01,  8.2345e-02,  7.2337e-02,  4.8633e-03,  5.8823e-02,
         2.9773e-01, -1.5537e-02,  7.3201e-02,  6.0246e-01,  7.7950e-02,
         2.5623e-01,  4.2367e-01,  1.4934e-02,  8.6283e-02,  1.0056e-02,
         7.5139e-02,  1.5625e-01,  1.4351e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -5.4577e-02,  1.7344e-01,  8.4271e-02,  8.4989e-02,
         9.4834e-02,  8.5185e-02,  1.6863e-01,  1.5425e-01, -2.4761e-03,
         1.2945e-01,  2.4700e-01,  1.6521e-01, -1.5069e-02,  6.6274e-01,
         3.1585e-01,  8.2345e-02,  7.2337e-02,  4.8633e-03,  5.8823e-02,
         2.9773e-01, -1.5537e-02,  7.3201e-02,  6.0246e-01,  7.7950e-02,
         2.5623e-01,  4.2367e-01,  1.4934e-02,  8.6283e-02,  1.0056e-02,
         7.5139e-02,  1.5625e-01,  1.4351e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

Tokens embeddings:
tensor([ 3.3173e-02, -5.4577e-02,  1.7344e-01,  8.4271e-02,  8.4989e-02,
         9.4834e-02,  8.5185e-02,  1.6863e-01,  1.5425e-01, -2.4761e-03,
         1.2945e-01,  2.4700e-01,  1.6521e-01, -1.5069e-02,  6.6274e-01,
         3.1585e-01,  8.2345e-02,  7.2337e-02,  4.8633e-03,  5.8823e-02,
         2.9773e-01, -1.5537e-02,  7.3201e-02,  6.0246e-01,  7.7950e-02,
         2.5623e-01,  4.2367e-01,  1.4934e-02,  8.6283e-02,  1.0056e-02,
         7.5139e-02,  1.5625e-01,  1.4351e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
         7.7928e-02,  7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02, -1.0000e+09,  7.7928e-02,  7.7928e-02,
         7.7928e-02, -2.1756e-03,  7.7928e-02, -2.1756e-03,  7.7928e-02,
        -2.1756e-03,  7.7928e-02

In [28]:
# Max Pooling for all tokens in sentences and argmax

# Select some sentences randomly
sent_index = [21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64]

for s in sent_index:
  print("*" * 100)
  # Get the sentence's words
  tokens = test_inputs[s]
  # For each layer...
  for l in range(12):
    print("\nLayer", l+1)
    attention = np.squeeze(test_attentions[s][l].detach().cpu().numpy(), axis=0)
    # and for each head
    #print("Head", h+1)
    # Get the attention for the cls token
    encoded_tokens = bert_tokenizer(tokens, truncation=True, padding=True, max_length=128, return_tensors='pt')
    encoded_tokens = encoded_tokens.to(device)
    with torch.no_grad():
      model_output1 = model_e(**encoded_tokens)
      tokens_embeddings = max_pooling(model_output1, encoded_tokens['attention_mask'])
      tokens_embeddings = tokens_embeddings.cpu()
      cls_attentions = head[0]
      display(HTML(colorize(tokens, cls_attentions)))
      print("Tokens embeddings:")
      print(tokens_embeddings)
      arg = argmax(tokens_embeddings)
      print('arg max of %s: %d' % (tokens_embeddings, arg))

Output hidden; open in https://colab.research.google.com to view.

In [29]:
# Pooling First token [CLS] for each sentence - argmax

# Select some sentences randomly
sent_index = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

for s in sent_index:
  print("*" * 100)
  # Get the sentence's words
  tokens = test_inputs[s]
  # For each layer...
  for l in range(12):
    print("\nLayer", l+1)
    attention = np.squeeze(test_attentions[s][l].detach().cpu().numpy(), axis=0)
    # and for each head
    for h, head in enumerate(attention):
      print("Head", h+1)
      # Get the attention for the cls token
      encoded_tokens = bert_tokenizer(tokens, padding=True, truncation=True, max_length=128, return_tensors='pt')
      encoded_tokens = encoded_tokens.to(device)
      with torch.no_grad():
        model_output1 = model_e(**encoded_tokens)
        tokens_embeddings = model_output1[0][:,0]
        tokens_embeddings = tokens_embeddings.cpu()
        cls_attentions = head[0]
        display(HTML(colorize(tokens, cls_attentions)))
        print("Tokens embeddings:")
        print(tokens_embeddings)
        arg = argmax(tokens_embeddings)
        print('arg max of %s: %d' % (tokens_embeddings, arg))

Output hidden; open in https://colab.research.google.com to view.

In [30]:
# Pooling First token [CLS] for each sentence - argmax

# Select some sentences randomly
sent_index = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

for s in sent_index:
  print("*" * 100)
  # Get the sentence's words
  tokens = test_inputs[s]
  # For each layer...
  for l in range(12):
    print("\nLayer", l+1)
    attention = np.squeeze(test_attentions[s][l].detach().cpu().numpy(), axis=0)
    # and for each head
    #for h, head in enumerate(attention):
    #print("Head", h+1)
    # Get the attention for the cls token
    encoded_tokens = bert_tokenizer(tokens, padding=True, truncation=True, max_length=128, return_tensors='pt')
    encoded_tokens = encoded_tokens.to(device)
    with torch.no_grad():
      model_output1 = model_e(**encoded_tokens)
      tokens_embeddings = model_output1[0][:,0]
      tokens_embeddings = tokens_embeddings.cpu()
      cls_attentions = head[0]
      display(HTML(colorize(tokens, cls_attentions)))
      print("Tokens embeddings:")
      print(tokens_embeddings)
      arg = argmax(tokens_embeddings)
      print('arg max of %s: %d' % (tokens_embeddings, arg))

****************************************************************************************************

Layer 1


Tokens embeddings:
tensor([ 0.0274,  0.0132,  0.2004, -0.0137, -0.0022,  1.1194,  0.2179,  0.3344,
         0.4760,  0.6855,  0.0358,  0.1708, -0.0131, -0.0767, -0.0202, -0.0022,
         1.1194, -0.0212,  0.2720,  0.0303, -0.0185,  0.1201, -0.0277,  0.0101,
         0.2004, -0.0185, -0.0022,  0.0525,  0.0908,  0.6025,  0.4304,  0.1294,
         0.0525,  0.2662,  0.2977,  0.6225, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0132,  0.2004, -

Tokens embeddings:
tensor([ 0.0274,  0.0132,  0.2004, -0.0137, -0.0022,  1.1194,  0.2179,  0.3344,
         0.4760,  0.6855,  0.0358,  0.1708, -0.0131, -0.0767, -0.0202, -0.0022,
         1.1194, -0.0212,  0.2720,  0.0303, -0.0185,  0.1201, -0.0277,  0.0101,
         0.2004, -0.0185, -0.0022,  0.0525,  0.0908,  0.6025,  0.4304,  0.1294,
         0.0525,  0.2662,  0.2977,  0.6225, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0132,  0.2004, -

Tokens embeddings:
tensor([ 0.0274,  0.0132,  0.2004, -0.0137, -0.0022,  1.1194,  0.2179,  0.3344,
         0.4760,  0.6855,  0.0358,  0.1708, -0.0131, -0.0767, -0.0202, -0.0022,
         1.1194, -0.0212,  0.2720,  0.0303, -0.0185,  0.1201, -0.0277,  0.0101,
         0.2004, -0.0185, -0.0022,  0.0525,  0.0908,  0.6025,  0.4304,  0.1294,
         0.0525,  0.2662,  0.2977,  0.6225, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0132,  0.2004, -

Tokens embeddings:
tensor([ 0.0274,  0.0132,  0.2004, -0.0137, -0.0022,  1.1194,  0.2179,  0.3344,
         0.4760,  0.6855,  0.0358,  0.1708, -0.0131, -0.0767, -0.0202, -0.0022,
         1.1194, -0.0212,  0.2720,  0.0303, -0.0185,  0.1201, -0.0277,  0.0101,
         0.2004, -0.0185, -0.0022,  0.0525,  0.0908,  0.6025,  0.4304,  0.1294,
         0.0525,  0.2662,  0.2977,  0.6225, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0132,  0.2004, -

Tokens embeddings:
tensor([ 0.0274,  0.0132,  0.2004, -0.0137, -0.0022,  1.1194,  0.2179,  0.3344,
         0.4760,  0.6855,  0.0358,  0.1708, -0.0131, -0.0767, -0.0202, -0.0022,
         1.1194, -0.0212,  0.2720,  0.0303, -0.0185,  0.1201, -0.0277,  0.0101,
         0.2004, -0.0185, -0.0022,  0.0525,  0.0908,  0.6025,  0.4304,  0.1294,
         0.0525,  0.2662,  0.2977,  0.6225, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0132,  0.2004, -

Tokens embeddings:
tensor([ 0.0274,  0.0132,  0.2004, -0.0137, -0.0022,  1.1194,  0.2179,  0.3344,
         0.4760,  0.6855,  0.0358,  0.1708, -0.0131, -0.0767, -0.0202, -0.0022,
         1.1194, -0.0212,  0.2720,  0.0303, -0.0185,  0.1201, -0.0277,  0.0101,
         0.2004, -0.0185, -0.0022,  0.0525,  0.0908,  0.6025,  0.4304,  0.1294,
         0.0525,  0.2662,  0.2977,  0.6225, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0132,  0.2004, -

Tokens embeddings:
tensor([ 0.0274,  0.0132,  0.2004, -0.0137, -0.0022,  1.1194,  0.2179,  0.3344,
         0.4760,  0.6855,  0.0358,  0.1708, -0.0131, -0.0767, -0.0202, -0.0022,
         1.1194, -0.0212,  0.2720,  0.0303, -0.0185,  0.1201, -0.0277,  0.0101,
         0.2004, -0.0185, -0.0022,  0.0525,  0.0908,  0.6025,  0.4304,  0.1294,
         0.0525,  0.2662,  0.2977,  0.6225, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0132,  0.2004, -

Tokens embeddings:
tensor([ 0.0274,  0.0132,  0.2004, -0.0137, -0.0022,  1.1194,  0.2179,  0.3344,
         0.4760,  0.6855,  0.0358,  0.1708, -0.0131, -0.0767, -0.0202, -0.0022,
         1.1194, -0.0212,  0.2720,  0.0303, -0.0185,  0.1201, -0.0277,  0.0101,
         0.2004, -0.0185, -0.0022,  0.0525,  0.0908,  0.6025,  0.4304,  0.1294,
         0.0525,  0.2662,  0.2977,  0.6225, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0132,  0.2004, -

Tokens embeddings:
tensor([ 0.0274,  0.0132,  0.2004, -0.0137, -0.0022,  1.1194,  0.2179,  0.3344,
         0.4760,  0.6855,  0.0358,  0.1708, -0.0131, -0.0767, -0.0202, -0.0022,
         1.1194, -0.0212,  0.2720,  0.0303, -0.0185,  0.1201, -0.0277,  0.0101,
         0.2004, -0.0185, -0.0022,  0.0525,  0.0908,  0.6025,  0.4304,  0.1294,
         0.0525,  0.2662,  0.2977,  0.6225, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0132,  0.2004, -

Tokens embeddings:
tensor([ 0.0274,  0.0132,  0.2004, -0.0137, -0.0022,  1.1194,  0.2179,  0.3344,
         0.4760,  0.6855,  0.0358,  0.1708, -0.0131, -0.0767, -0.0202, -0.0022,
         1.1194, -0.0212,  0.2720,  0.0303, -0.0185,  0.1201, -0.0277,  0.0101,
         0.2004, -0.0185, -0.0022,  0.0525,  0.0908,  0.6025,  0.4304,  0.1294,
         0.0525,  0.2662,  0.2977,  0.6225, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0132,  0.2004, -

Tokens embeddings:
tensor([ 0.0274,  0.0132,  0.2004, -0.0137, -0.0022,  1.1194,  0.2179,  0.3344,
         0.4760,  0.6855,  0.0358,  0.1708, -0.0131, -0.0767, -0.0202, -0.0022,
         1.1194, -0.0212,  0.2720,  0.0303, -0.0185,  0.1201, -0.0277,  0.0101,
         0.2004, -0.0185, -0.0022,  0.0525,  0.0908,  0.6025,  0.4304,  0.1294,
         0.0525,  0.2662,  0.2977,  0.6225, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0132,  0.2004, -

Tokens embeddings:
tensor([ 0.0274,  0.0132,  0.2004, -0.0137, -0.0022,  1.1194,  0.2179,  0.3344,
         0.4760,  0.6855,  0.0358,  0.1708, -0.0131, -0.0767, -0.0202, -0.0022,
         1.1194, -0.0212,  0.2720,  0.0303, -0.0185,  0.1201, -0.0277,  0.0101,
         0.2004, -0.0185, -0.0022,  0.0525,  0.0908,  0.6025,  0.4304,  0.1294,
         0.0525,  0.2662,  0.2977,  0.6225, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0132,  0.2004, -

Tokens embeddings:
tensor([ 0.0274,  0.0288,  0.0121,  0.1084,  0.0510,  0.0999,  0.4418,  0.2470,
        -0.0179, -0.0057,  0.6025,  0.2082, -0.0119,  0.3142,  0.0999,  0.2720,
         0.2470,  0.0362,  0.0520,  0.0948,  0.0100, -0.0277,  0.8606,  0.3214,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0288,  0.0121,  

Tokens embeddings:
tensor([ 0.0274,  0.0288,  0.0121,  0.1084,  0.0510,  0.0999,  0.4418,  0.2470,
        -0.0179, -0.0057,  0.6025,  0.2082, -0.0119,  0.3142,  0.0999,  0.2720,
         0.2470,  0.0362,  0.0520,  0.0948,  0.0100, -0.0277,  0.8606,  0.3214,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0288,  0.0121,  

Tokens embeddings:
tensor([ 0.0274,  0.0288,  0.0121,  0.1084,  0.0510,  0.0999,  0.4418,  0.2470,
        -0.0179, -0.0057,  0.6025,  0.2082, -0.0119,  0.3142,  0.0999,  0.2720,
         0.2470,  0.0362,  0.0520,  0.0948,  0.0100, -0.0277,  0.8606,  0.3214,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0288,  0.0121,  

Tokens embeddings:
tensor([ 0.0274,  0.0288,  0.0121,  0.1084,  0.0510,  0.0999,  0.4418,  0.2470,
        -0.0179, -0.0057,  0.6025,  0.2082, -0.0119,  0.3142,  0.0999,  0.2720,
         0.2470,  0.0362,  0.0520,  0.0948,  0.0100, -0.0277,  0.8606,  0.3214,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0288,  0.0121,  

Tokens embeddings:
tensor([ 0.0274,  0.0288,  0.0121,  0.1084,  0.0510,  0.0999,  0.4418,  0.2470,
        -0.0179, -0.0057,  0.6025,  0.2082, -0.0119,  0.3142,  0.0999,  0.2720,
         0.2470,  0.0362,  0.0520,  0.0948,  0.0100, -0.0277,  0.8606,  0.3214,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0288,  0.0121,  

Tokens embeddings:
tensor([ 0.0274,  0.0288,  0.0121,  0.1084,  0.0510,  0.0999,  0.4418,  0.2470,
        -0.0179, -0.0057,  0.6025,  0.2082, -0.0119,  0.3142,  0.0999,  0.2720,
         0.2470,  0.0362,  0.0520,  0.0948,  0.0100, -0.0277,  0.8606,  0.3214,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0288,  0.0121,  

Tokens embeddings:
tensor([ 0.0274,  0.0288,  0.0121,  0.1084,  0.0510,  0.0999,  0.4418,  0.2470,
        -0.0179, -0.0057,  0.6025,  0.2082, -0.0119,  0.3142,  0.0999,  0.2720,
         0.2470,  0.0362,  0.0520,  0.0948,  0.0100, -0.0277,  0.8606,  0.3214,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0288,  0.0121,  

Tokens embeddings:
tensor([ 0.0274,  0.0288,  0.0121,  0.1084,  0.0510,  0.0999,  0.4418,  0.2470,
        -0.0179, -0.0057,  0.6025,  0.2082, -0.0119,  0.3142,  0.0999,  0.2720,
         0.2470,  0.0362,  0.0520,  0.0948,  0.0100, -0.0277,  0.8606,  0.3214,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0288,  0.0121,  

Tokens embeddings:
tensor([ 0.0274,  0.0288,  0.0121,  0.1084,  0.0510,  0.0999,  0.4418,  0.2470,
        -0.0179, -0.0057,  0.6025,  0.2082, -0.0119,  0.3142,  0.0999,  0.2720,
         0.2470,  0.0362,  0.0520,  0.0948,  0.0100, -0.0277,  0.8606,  0.3214,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0288,  0.0121,  

Tokens embeddings:
tensor([ 0.0274,  0.0288,  0.0121,  0.1084,  0.0510,  0.0999,  0.4418,  0.2470,
        -0.0179, -0.0057,  0.6025,  0.2082, -0.0119,  0.3142,  0.0999,  0.2720,
         0.2470,  0.0362,  0.0520,  0.0948,  0.0100, -0.0277,  0.8606,  0.3214,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0288,  0.0121,  

Tokens embeddings:
tensor([ 0.0274,  0.0288,  0.0121,  0.1084,  0.0510,  0.0999,  0.4418,  0.2470,
        -0.0179, -0.0057,  0.6025,  0.2082, -0.0119,  0.3142,  0.0999,  0.2720,
         0.2470,  0.0362,  0.0520,  0.0948,  0.0100, -0.0277,  0.8606,  0.3214,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0288,  0.0121,  

Tokens embeddings:
tensor([ 0.0274,  0.0288,  0.0121,  0.1084,  0.0510,  0.0999,  0.4418,  0.2470,
        -0.0179, -0.0057,  0.6025,  0.2082, -0.0119,  0.3142,  0.0999,  0.2720,
         0.2470,  0.0362,  0.0520,  0.0948,  0.0100, -0.0277,  0.8606,  0.3214,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0288,  0.0121,  

Tokens embeddings:
tensor([ 0.0274, -0.1941, -0.0681, -0.0141,  0.0999,  0.0948,  0.2977,  0.0149,
        -0.0019, -0.0075,  0.0604, -0.0277,  0.0101,  0.1780,  0.5148,  0.1354,
        -0.0124,  0.6025,  0.0948,  0.0406, -0.0205,  0.1676,  0.6722, -0.0277,
         0.0101,  0.0684,  0.2994, -0.0025,  0.0588,  0.8781, -0.0212,  0.4809,
        -0.0277,  0.1108,  0.6025,  0.9649,  0.0098,  0.4809,  0.0324, -0.0180,
         0.1357, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941, -0.0681, -

Tokens embeddings:
tensor([ 0.0274, -0.1941, -0.0681, -0.0141,  0.0999,  0.0948,  0.2977,  0.0149,
        -0.0019, -0.0075,  0.0604, -0.0277,  0.0101,  0.1780,  0.5148,  0.1354,
        -0.0124,  0.6025,  0.0948,  0.0406, -0.0205,  0.1676,  0.6722, -0.0277,
         0.0101,  0.0684,  0.2994, -0.0025,  0.0588,  0.8781, -0.0212,  0.4809,
        -0.0277,  0.1108,  0.6025,  0.9649,  0.0098,  0.4809,  0.0324, -0.0180,
         0.1357, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941, -0.0681, -

Tokens embeddings:
tensor([ 0.0274, -0.1941, -0.0681, -0.0141,  0.0999,  0.0948,  0.2977,  0.0149,
        -0.0019, -0.0075,  0.0604, -0.0277,  0.0101,  0.1780,  0.5148,  0.1354,
        -0.0124,  0.6025,  0.0948,  0.0406, -0.0205,  0.1676,  0.6722, -0.0277,
         0.0101,  0.0684,  0.2994, -0.0025,  0.0588,  0.8781, -0.0212,  0.4809,
        -0.0277,  0.1108,  0.6025,  0.9649,  0.0098,  0.4809,  0.0324, -0.0180,
         0.1357, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941, -0.0681, -

Tokens embeddings:
tensor([ 0.0274, -0.1941, -0.0681, -0.0141,  0.0999,  0.0948,  0.2977,  0.0149,
        -0.0019, -0.0075,  0.0604, -0.0277,  0.0101,  0.1780,  0.5148,  0.1354,
        -0.0124,  0.6025,  0.0948,  0.0406, -0.0205,  0.1676,  0.6722, -0.0277,
         0.0101,  0.0684,  0.2994, -0.0025,  0.0588,  0.8781, -0.0212,  0.4809,
        -0.0277,  0.1108,  0.6025,  0.9649,  0.0098,  0.4809,  0.0324, -0.0180,
         0.1357, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941, -0.0681, -

Tokens embeddings:
tensor([ 0.0274, -0.1941, -0.0681, -0.0141,  0.0999,  0.0948,  0.2977,  0.0149,
        -0.0019, -0.0075,  0.0604, -0.0277,  0.0101,  0.1780,  0.5148,  0.1354,
        -0.0124,  0.6025,  0.0948,  0.0406, -0.0205,  0.1676,  0.6722, -0.0277,
         0.0101,  0.0684,  0.2994, -0.0025,  0.0588,  0.8781, -0.0212,  0.4809,
        -0.0277,  0.1108,  0.6025,  0.9649,  0.0098,  0.4809,  0.0324, -0.0180,
         0.1357, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941, -0.0681, -

Tokens embeddings:
tensor([ 0.0274, -0.1941, -0.0681, -0.0141,  0.0999,  0.0948,  0.2977,  0.0149,
        -0.0019, -0.0075,  0.0604, -0.0277,  0.0101,  0.1780,  0.5148,  0.1354,
        -0.0124,  0.6025,  0.0948,  0.0406, -0.0205,  0.1676,  0.6722, -0.0277,
         0.0101,  0.0684,  0.2994, -0.0025,  0.0588,  0.8781, -0.0212,  0.4809,
        -0.0277,  0.1108,  0.6025,  0.9649,  0.0098,  0.4809,  0.0324, -0.0180,
         0.1357, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941, -0.0681, -

Tokens embeddings:
tensor([ 0.0274, -0.1941, -0.0681, -0.0141,  0.0999,  0.0948,  0.2977,  0.0149,
        -0.0019, -0.0075,  0.0604, -0.0277,  0.0101,  0.1780,  0.5148,  0.1354,
        -0.0124,  0.6025,  0.0948,  0.0406, -0.0205,  0.1676,  0.6722, -0.0277,
         0.0101,  0.0684,  0.2994, -0.0025,  0.0588,  0.8781, -0.0212,  0.4809,
        -0.0277,  0.1108,  0.6025,  0.9649,  0.0098,  0.4809,  0.0324, -0.0180,
         0.1357, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941, -0.0681, -

Tokens embeddings:
tensor([ 0.0274, -0.1941, -0.0681, -0.0141,  0.0999,  0.0948,  0.2977,  0.0149,
        -0.0019, -0.0075,  0.0604, -0.0277,  0.0101,  0.1780,  0.5148,  0.1354,
        -0.0124,  0.6025,  0.0948,  0.0406, -0.0205,  0.1676,  0.6722, -0.0277,
         0.0101,  0.0684,  0.2994, -0.0025,  0.0588,  0.8781, -0.0212,  0.4809,
        -0.0277,  0.1108,  0.6025,  0.9649,  0.0098,  0.4809,  0.0324, -0.0180,
         0.1357, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941, -0.0681, -

Tokens embeddings:
tensor([ 0.0274, -0.1941, -0.0681, -0.0141,  0.0999,  0.0948,  0.2977,  0.0149,
        -0.0019, -0.0075,  0.0604, -0.0277,  0.0101,  0.1780,  0.5148,  0.1354,
        -0.0124,  0.6025,  0.0948,  0.0406, -0.0205,  0.1676,  0.6722, -0.0277,
         0.0101,  0.0684,  0.2994, -0.0025,  0.0588,  0.8781, -0.0212,  0.4809,
        -0.0277,  0.1108,  0.6025,  0.9649,  0.0098,  0.4809,  0.0324, -0.0180,
         0.1357, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941, -0.0681, -

Tokens embeddings:
tensor([ 0.0274, -0.1941, -0.0681, -0.0141,  0.0999,  0.0948,  0.2977,  0.0149,
        -0.0019, -0.0075,  0.0604, -0.0277,  0.0101,  0.1780,  0.5148,  0.1354,
        -0.0124,  0.6025,  0.0948,  0.0406, -0.0205,  0.1676,  0.6722, -0.0277,
         0.0101,  0.0684,  0.2994, -0.0025,  0.0588,  0.8781, -0.0212,  0.4809,
        -0.0277,  0.1108,  0.6025,  0.9649,  0.0098,  0.4809,  0.0324, -0.0180,
         0.1357, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941, -0.0681, -

Tokens embeddings:
tensor([ 0.0274, -0.1941, -0.0681, -0.0141,  0.0999,  0.0948,  0.2977,  0.0149,
        -0.0019, -0.0075,  0.0604, -0.0277,  0.0101,  0.1780,  0.5148,  0.1354,
        -0.0124,  0.6025,  0.0948,  0.0406, -0.0205,  0.1676,  0.6722, -0.0277,
         0.0101,  0.0684,  0.2994, -0.0025,  0.0588,  0.8781, -0.0212,  0.4809,
        -0.0277,  0.1108,  0.6025,  0.9649,  0.0098,  0.4809,  0.0324, -0.0180,
         0.1357, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941, -0.0681, -

Tokens embeddings:
tensor([ 0.0274, -0.1941, -0.0681, -0.0141,  0.0999,  0.0948,  0.2977,  0.0149,
        -0.0019, -0.0075,  0.0604, -0.0277,  0.0101,  0.1780,  0.5148,  0.1354,
        -0.0124,  0.6025,  0.0948,  0.0406, -0.0205,  0.1676,  0.6722, -0.0277,
         0.0101,  0.0684,  0.2994, -0.0025,  0.0588,  0.8781, -0.0212,  0.4809,
        -0.0277,  0.1108,  0.6025,  0.9649,  0.0098,  0.4809,  0.0324, -0.0180,
         0.1357, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941, -0.0681, -

Tokens embeddings:
tensor([ 0.0274,  0.3983,  0.0442,  1.1194,  0.2977,  0.3068,  0.6105, -0.0045,
         0.5936,  0.1539, -0.0163,  0.0247,  0.2662,  0.4085,  0.2977, -0.0405,
         0.2658,  0.0886, -0.0040,  0.0189,  0.4237,  0.0020, -0.0277,  0.0101,
         0.1435,  0.0372, -0.1131, -0.0277,  0.0101,  0.2178,  0.1036,  0.4498,
         0.2977, -0.0771, -0.0044, -0.0837,  1.3231,  0.6025,  0.5037,  0.4498,
         0.2655, -0.0590, -0.0278, -0.0219, -0.0215, -0.0101, -0.1662, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.3983,  0.0442,  

Tokens embeddings:
tensor([ 0.0274,  0.3983,  0.0442,  1.1194,  0.2977,  0.3068,  0.6105, -0.0045,
         0.5936,  0.1539, -0.0163,  0.0247,  0.2662,  0.4085,  0.2977, -0.0405,
         0.2658,  0.0886, -0.0040,  0.0189,  0.4237,  0.0020, -0.0277,  0.0101,
         0.1435,  0.0372, -0.1131, -0.0277,  0.0101,  0.2178,  0.1036,  0.4498,
         0.2977, -0.0771, -0.0044, -0.0837,  1.3231,  0.6025,  0.5037,  0.4498,
         0.2655, -0.0590, -0.0278, -0.0219, -0.0215, -0.0101, -0.1662, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.3983,  0.0442,  

Tokens embeddings:
tensor([ 0.0274,  0.3983,  0.0442,  1.1194,  0.2977,  0.3068,  0.6105, -0.0045,
         0.5936,  0.1539, -0.0163,  0.0247,  0.2662,  0.4085,  0.2977, -0.0405,
         0.2658,  0.0886, -0.0040,  0.0189,  0.4237,  0.0020, -0.0277,  0.0101,
         0.1435,  0.0372, -0.1131, -0.0277,  0.0101,  0.2178,  0.1036,  0.4498,
         0.2977, -0.0771, -0.0044, -0.0837,  1.3231,  0.6025,  0.5037,  0.4498,
         0.2655, -0.0590, -0.0278, -0.0219, -0.0215, -0.0101, -0.1662, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.3983,  0.0442,  

Tokens embeddings:
tensor([ 0.0274,  0.3983,  0.0442,  1.1194,  0.2977,  0.3068,  0.6105, -0.0045,
         0.5936,  0.1539, -0.0163,  0.0247,  0.2662,  0.4085,  0.2977, -0.0405,
         0.2658,  0.0886, -0.0040,  0.0189,  0.4237,  0.0020, -0.0277,  0.0101,
         0.1435,  0.0372, -0.1131, -0.0277,  0.0101,  0.2178,  0.1036,  0.4498,
         0.2977, -0.0771, -0.0044, -0.0837,  1.3231,  0.6025,  0.5037,  0.4498,
         0.2655, -0.0590, -0.0278, -0.0219, -0.0215, -0.0101, -0.1662, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.3983,  0.0442,  

Tokens embeddings:
tensor([ 0.0274,  0.3983,  0.0442,  1.1194,  0.2977,  0.3068,  0.6105, -0.0045,
         0.5936,  0.1539, -0.0163,  0.0247,  0.2662,  0.4085,  0.2977, -0.0405,
         0.2658,  0.0886, -0.0040,  0.0189,  0.4237,  0.0020, -0.0277,  0.0101,
         0.1435,  0.0372, -0.1131, -0.0277,  0.0101,  0.2178,  0.1036,  0.4498,
         0.2977, -0.0771, -0.0044, -0.0837,  1.3231,  0.6025,  0.5037,  0.4498,
         0.2655, -0.0590, -0.0278, -0.0219, -0.0215, -0.0101, -0.1662, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.3983,  0.0442,  

Tokens embeddings:
tensor([ 0.0274,  0.3983,  0.0442,  1.1194,  0.2977,  0.3068,  0.6105, -0.0045,
         0.5936,  0.1539, -0.0163,  0.0247,  0.2662,  0.4085,  0.2977, -0.0405,
         0.2658,  0.0886, -0.0040,  0.0189,  0.4237,  0.0020, -0.0277,  0.0101,
         0.1435,  0.0372, -0.1131, -0.0277,  0.0101,  0.2178,  0.1036,  0.4498,
         0.2977, -0.0771, -0.0044, -0.0837,  1.3231,  0.6025,  0.5037,  0.4498,
         0.2655, -0.0590, -0.0278, -0.0219, -0.0215, -0.0101, -0.1662, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.3983,  0.0442,  

Tokens embeddings:
tensor([ 0.0274,  0.3983,  0.0442,  1.1194,  0.2977,  0.3068,  0.6105, -0.0045,
         0.5936,  0.1539, -0.0163,  0.0247,  0.2662,  0.4085,  0.2977, -0.0405,
         0.2658,  0.0886, -0.0040,  0.0189,  0.4237,  0.0020, -0.0277,  0.0101,
         0.1435,  0.0372, -0.1131, -0.0277,  0.0101,  0.2178,  0.1036,  0.4498,
         0.2977, -0.0771, -0.0044, -0.0837,  1.3231,  0.6025,  0.5037,  0.4498,
         0.2655, -0.0590, -0.0278, -0.0219, -0.0215, -0.0101, -0.1662, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.3983,  0.0442,  

Tokens embeddings:
tensor([ 0.0274,  0.3983,  0.0442,  1.1194,  0.2977,  0.3068,  0.6105, -0.0045,
         0.5936,  0.1539, -0.0163,  0.0247,  0.2662,  0.4085,  0.2977, -0.0405,
         0.2658,  0.0886, -0.0040,  0.0189,  0.4237,  0.0020, -0.0277,  0.0101,
         0.1435,  0.0372, -0.1131, -0.0277,  0.0101,  0.2178,  0.1036,  0.4498,
         0.2977, -0.0771, -0.0044, -0.0837,  1.3231,  0.6025,  0.5037,  0.4498,
         0.2655, -0.0590, -0.0278, -0.0219, -0.0215, -0.0101, -0.1662, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.3983,  0.0442,  

Tokens embeddings:
tensor([ 0.0274,  0.3983,  0.0442,  1.1194,  0.2977,  0.3068,  0.6105, -0.0045,
         0.5936,  0.1539, -0.0163,  0.0247,  0.2662,  0.4085,  0.2977, -0.0405,
         0.2658,  0.0886, -0.0040,  0.0189,  0.4237,  0.0020, -0.0277,  0.0101,
         0.1435,  0.0372, -0.1131, -0.0277,  0.0101,  0.2178,  0.1036,  0.4498,
         0.2977, -0.0771, -0.0044, -0.0837,  1.3231,  0.6025,  0.5037,  0.4498,
         0.2655, -0.0590, -0.0278, -0.0219, -0.0215, -0.0101, -0.1662, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.3983,  0.0442,  

Tokens embeddings:
tensor([ 0.0274,  0.3983,  0.0442,  1.1194,  0.2977,  0.3068,  0.6105, -0.0045,
         0.5936,  0.1539, -0.0163,  0.0247,  0.2662,  0.4085,  0.2977, -0.0405,
         0.2658,  0.0886, -0.0040,  0.0189,  0.4237,  0.0020, -0.0277,  0.0101,
         0.1435,  0.0372, -0.1131, -0.0277,  0.0101,  0.2178,  0.1036,  0.4498,
         0.2977, -0.0771, -0.0044, -0.0837,  1.3231,  0.6025,  0.5037,  0.4498,
         0.2655, -0.0590, -0.0278, -0.0219, -0.0215, -0.0101, -0.1662, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.3983,  0.0442,  

Tokens embeddings:
tensor([ 0.0274,  0.3983,  0.0442,  1.1194,  0.2977,  0.3068,  0.6105, -0.0045,
         0.5936,  0.1539, -0.0163,  0.0247,  0.2662,  0.4085,  0.2977, -0.0405,
         0.2658,  0.0886, -0.0040,  0.0189,  0.4237,  0.0020, -0.0277,  0.0101,
         0.1435,  0.0372, -0.1131, -0.0277,  0.0101,  0.2178,  0.1036,  0.4498,
         0.2977, -0.0771, -0.0044, -0.0837,  1.3231,  0.6025,  0.5037,  0.4498,
         0.2655, -0.0590, -0.0278, -0.0219, -0.0215, -0.0101, -0.1662, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.3983,  0.0442,  

Tokens embeddings:
tensor([ 0.0274,  0.3983,  0.0442,  1.1194,  0.2977,  0.3068,  0.6105, -0.0045,
         0.5936,  0.1539, -0.0163,  0.0247,  0.2662,  0.4085,  0.2977, -0.0405,
         0.2658,  0.0886, -0.0040,  0.0189,  0.4237,  0.0020, -0.0277,  0.0101,
         0.1435,  0.0372, -0.1131, -0.0277,  0.0101,  0.2178,  0.1036,  0.4498,
         0.2977, -0.0771, -0.0044, -0.0837,  1.3231,  0.6025,  0.5037,  0.4498,
         0.2655, -0.0590, -0.0278, -0.0219, -0.0215, -0.0101, -0.1662, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.3983,  0.0442,  

Tokens embeddings:
tensor([ 0.0274, -0.0546, -0.0732,  0.4395, -0.0022,  0.2977,  0.0250,  0.0972,
         0.4498,  0.0614, -0.0092, -0.0206,  0.1567, -0.0022,  0.4498, -0.0102,
        -0.0140,  0.1079, -0.0277,  0.8606, -0.0198,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0546, -0.0732,  

Tokens embeddings:
tensor([ 0.0274, -0.0546, -0.0732,  0.4395, -0.0022,  0.2977,  0.0250,  0.0972,
         0.4498,  0.0614, -0.0092, -0.0206,  0.1567, -0.0022,  0.4498, -0.0102,
        -0.0140,  0.1079, -0.0277,  0.8606, -0.0198,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0546, -0.0732,  

Tokens embeddings:
tensor([ 0.0274, -0.0546, -0.0732,  0.4395, -0.0022,  0.2977,  0.0250,  0.0972,
         0.4498,  0.0614, -0.0092, -0.0206,  0.1567, -0.0022,  0.4498, -0.0102,
        -0.0140,  0.1079, -0.0277,  0.8606, -0.0198,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0546, -0.0732,  

Tokens embeddings:
tensor([ 0.0274, -0.0546, -0.0732,  0.4395, -0.0022,  0.2977,  0.0250,  0.0972,
         0.4498,  0.0614, -0.0092, -0.0206,  0.1567, -0.0022,  0.4498, -0.0102,
        -0.0140,  0.1079, -0.0277,  0.8606, -0.0198,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0546, -0.0732,  

Tokens embeddings:
tensor([ 0.0274, -0.0546, -0.0732,  0.4395, -0.0022,  0.2977,  0.0250,  0.0972,
         0.4498,  0.0614, -0.0092, -0.0206,  0.1567, -0.0022,  0.4498, -0.0102,
        -0.0140,  0.1079, -0.0277,  0.8606, -0.0198,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0546, -0.0732,  

Tokens embeddings:
tensor([ 0.0274, -0.0546, -0.0732,  0.4395, -0.0022,  0.2977,  0.0250,  0.0972,
         0.4498,  0.0614, -0.0092, -0.0206,  0.1567, -0.0022,  0.4498, -0.0102,
        -0.0140,  0.1079, -0.0277,  0.8606, -0.0198,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0546, -0.0732,  

Tokens embeddings:
tensor([ 0.0274, -0.0546, -0.0732,  0.4395, -0.0022,  0.2977,  0.0250,  0.0972,
         0.4498,  0.0614, -0.0092, -0.0206,  0.1567, -0.0022,  0.4498, -0.0102,
        -0.0140,  0.1079, -0.0277,  0.8606, -0.0198,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0546, -0.0732,  

Tokens embeddings:
tensor([ 0.0274, -0.0546, -0.0732,  0.4395, -0.0022,  0.2977,  0.0250,  0.0972,
         0.4498,  0.0614, -0.0092, -0.0206,  0.1567, -0.0022,  0.4498, -0.0102,
        -0.0140,  0.1079, -0.0277,  0.8606, -0.0198,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0546, -0.0732,  

Tokens embeddings:
tensor([ 0.0274, -0.0546, -0.0732,  0.4395, -0.0022,  0.2977,  0.0250,  0.0972,
         0.4498,  0.0614, -0.0092, -0.0206,  0.1567, -0.0022,  0.4498, -0.0102,
        -0.0140,  0.1079, -0.0277,  0.8606, -0.0198,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0546, -0.0732,  

Tokens embeddings:
tensor([ 0.0274, -0.0546, -0.0732,  0.4395, -0.0022,  0.2977,  0.0250,  0.0972,
         0.4498,  0.0614, -0.0092, -0.0206,  0.1567, -0.0022,  0.4498, -0.0102,
        -0.0140,  0.1079, -0.0277,  0.8606, -0.0198,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0546, -0.0732,  

Tokens embeddings:
tensor([ 0.0274, -0.0546, -0.0732,  0.4395, -0.0022,  0.2977,  0.0250,  0.0972,
         0.4498,  0.0614, -0.0092, -0.0206,  0.1567, -0.0022,  0.4498, -0.0102,
        -0.0140,  0.1079, -0.0277,  0.8606, -0.0198,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0546, -0.0732,  

Tokens embeddings:
tensor([ 0.0274, -0.0546, -0.0732,  0.4395, -0.0022,  0.2977,  0.0250,  0.0972,
         0.4498,  0.0614, -0.0092, -0.0206,  0.1567, -0.0022,  0.4498, -0.0102,
        -0.0140,  0.1079, -0.0277,  0.8606, -0.0198,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0546, -0.0732,  

Tokens embeddings:
tensor([ 0.0274,  0.0751,  0.0048, -0.0277,  0.0101,  0.1734, -0.3577, -0.0089,
         0.1953, -0.1083,  0.0250,  0.2055,  0.2388, -0.0022,  0.6025,  0.0866,
         0.0349, -0.0277,  0.0101,  0.0433, -0.0334,  0.1299,  0.0948, -0.0771,
        -0.0366, -0.0277,  0.0101,  0.2662,  0.1539, -0.0277,  0.0934, -0.1022,
        -0.0022,  0.0948,  0.2977, -0.0109,  0.8606, -0.0128, -0.0277,  0.8606,
         0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0751,  0.0048, -

Tokens embeddings:
tensor([ 0.0274,  0.0751,  0.0048, -0.0277,  0.0101,  0.1734, -0.3577, -0.0089,
         0.1953, -0.1083,  0.0250,  0.2055,  0.2388, -0.0022,  0.6025,  0.0866,
         0.0349, -0.0277,  0.0101,  0.0433, -0.0334,  0.1299,  0.0948, -0.0771,
        -0.0366, -0.0277,  0.0101,  0.2662,  0.1539, -0.0277,  0.0934, -0.1022,
        -0.0022,  0.0948,  0.2977, -0.0109,  0.8606, -0.0128, -0.0277,  0.8606,
         0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0751,  0.0048, -

Tokens embeddings:
tensor([ 0.0274,  0.0751,  0.0048, -0.0277,  0.0101,  0.1734, -0.3577, -0.0089,
         0.1953, -0.1083,  0.0250,  0.2055,  0.2388, -0.0022,  0.6025,  0.0866,
         0.0349, -0.0277,  0.0101,  0.0433, -0.0334,  0.1299,  0.0948, -0.0771,
        -0.0366, -0.0277,  0.0101,  0.2662,  0.1539, -0.0277,  0.0934, -0.1022,
        -0.0022,  0.0948,  0.2977, -0.0109,  0.8606, -0.0128, -0.0277,  0.8606,
         0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0751,  0.0048, -

Tokens embeddings:
tensor([ 0.0274,  0.0751,  0.0048, -0.0277,  0.0101,  0.1734, -0.3577, -0.0089,
         0.1953, -0.1083,  0.0250,  0.2055,  0.2388, -0.0022,  0.6025,  0.0866,
         0.0349, -0.0277,  0.0101,  0.0433, -0.0334,  0.1299,  0.0948, -0.0771,
        -0.0366, -0.0277,  0.0101,  0.2662,  0.1539, -0.0277,  0.0934, -0.1022,
        -0.0022,  0.0948,  0.2977, -0.0109,  0.8606, -0.0128, -0.0277,  0.8606,
         0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0751,  0.0048, -

Tokens embeddings:
tensor([ 0.0274,  0.0751,  0.0048, -0.0277,  0.0101,  0.1734, -0.3577, -0.0089,
         0.1953, -0.1083,  0.0250,  0.2055,  0.2388, -0.0022,  0.6025,  0.0866,
         0.0349, -0.0277,  0.0101,  0.0433, -0.0334,  0.1299,  0.0948, -0.0771,
        -0.0366, -0.0277,  0.0101,  0.2662,  0.1539, -0.0277,  0.0934, -0.1022,
        -0.0022,  0.0948,  0.2977, -0.0109,  0.8606, -0.0128, -0.0277,  0.8606,
         0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0751,  0.0048, -

Tokens embeddings:
tensor([ 0.0274,  0.0751,  0.0048, -0.0277,  0.0101,  0.1734, -0.3577, -0.0089,
         0.1953, -0.1083,  0.0250,  0.2055,  0.2388, -0.0022,  0.6025,  0.0866,
         0.0349, -0.0277,  0.0101,  0.0433, -0.0334,  0.1299,  0.0948, -0.0771,
        -0.0366, -0.0277,  0.0101,  0.2662,  0.1539, -0.0277,  0.0934, -0.1022,
        -0.0022,  0.0948,  0.2977, -0.0109,  0.8606, -0.0128, -0.0277,  0.8606,
         0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0751,  0.0048, -

Tokens embeddings:
tensor([ 0.0274,  0.0751,  0.0048, -0.0277,  0.0101,  0.1734, -0.3577, -0.0089,
         0.1953, -0.1083,  0.0250,  0.2055,  0.2388, -0.0022,  0.6025,  0.0866,
         0.0349, -0.0277,  0.0101,  0.0433, -0.0334,  0.1299,  0.0948, -0.0771,
        -0.0366, -0.0277,  0.0101,  0.2662,  0.1539, -0.0277,  0.0934, -0.1022,
        -0.0022,  0.0948,  0.2977, -0.0109,  0.8606, -0.0128, -0.0277,  0.8606,
         0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0751,  0.0048, -

Tokens embeddings:
tensor([ 0.0274,  0.0751,  0.0048, -0.0277,  0.0101,  0.1734, -0.3577, -0.0089,
         0.1953, -0.1083,  0.0250,  0.2055,  0.2388, -0.0022,  0.6025,  0.0866,
         0.0349, -0.0277,  0.0101,  0.0433, -0.0334,  0.1299,  0.0948, -0.0771,
        -0.0366, -0.0277,  0.0101,  0.2662,  0.1539, -0.0277,  0.0934, -0.1022,
        -0.0022,  0.0948,  0.2977, -0.0109,  0.8606, -0.0128, -0.0277,  0.8606,
         0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0751,  0.0048, -

Tokens embeddings:
tensor([ 0.0274,  0.0751,  0.0048, -0.0277,  0.0101,  0.1734, -0.3577, -0.0089,
         0.1953, -0.1083,  0.0250,  0.2055,  0.2388, -0.0022,  0.6025,  0.0866,
         0.0349, -0.0277,  0.0101,  0.0433, -0.0334,  0.1299,  0.0948, -0.0771,
        -0.0366, -0.0277,  0.0101,  0.2662,  0.1539, -0.0277,  0.0934, -0.1022,
        -0.0022,  0.0948,  0.2977, -0.0109,  0.8606, -0.0128, -0.0277,  0.8606,
         0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0751,  0.0048, -

Tokens embeddings:
tensor([ 0.0274,  0.0751,  0.0048, -0.0277,  0.0101,  0.1734, -0.3577, -0.0089,
         0.1953, -0.1083,  0.0250,  0.2055,  0.2388, -0.0022,  0.6025,  0.0866,
         0.0349, -0.0277,  0.0101,  0.0433, -0.0334,  0.1299,  0.0948, -0.0771,
        -0.0366, -0.0277,  0.0101,  0.2662,  0.1539, -0.0277,  0.0934, -0.1022,
        -0.0022,  0.0948,  0.2977, -0.0109,  0.8606, -0.0128, -0.0277,  0.8606,
         0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0751,  0.0048, -

Tokens embeddings:
tensor([ 0.0274,  0.0751,  0.0048, -0.0277,  0.0101,  0.1734, -0.3577, -0.0089,
         0.1953, -0.1083,  0.0250,  0.2055,  0.2388, -0.0022,  0.6025,  0.0866,
         0.0349, -0.0277,  0.0101,  0.0433, -0.0334,  0.1299,  0.0948, -0.0771,
        -0.0366, -0.0277,  0.0101,  0.2662,  0.1539, -0.0277,  0.0934, -0.1022,
        -0.0022,  0.0948,  0.2977, -0.0109,  0.8606, -0.0128, -0.0277,  0.8606,
         0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0751,  0.0048, -

Tokens embeddings:
tensor([ 0.0274,  0.0751,  0.0048, -0.0277,  0.0101,  0.1734, -0.3577, -0.0089,
         0.1953, -0.1083,  0.0250,  0.2055,  0.2388, -0.0022,  0.6025,  0.0866,
         0.0349, -0.0277,  0.0101,  0.0433, -0.0334,  0.1299,  0.0948, -0.0771,
        -0.0366, -0.0277,  0.0101,  0.2662,  0.1539, -0.0277,  0.0934, -0.1022,
        -0.0022,  0.0948,  0.2977, -0.0109,  0.8606, -0.0128, -0.0277,  0.8606,
         0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0751,  0.0048, -

Tokens embeddings:
tensor([ 2.7411e-02, -1.9409e-01, -1.0473e-02, -1.9156e-03, -1.6954e-02,
        -2.1465e-02, -2.7676e-02,  1.0056e-02,  9.7448e-02,  1.4232e-01,
         2.2607e-01,  4.4977e-01,  2.9773e-01, -5.7123e-02, -2.1518e-03,
         2.8717e-01,  1.3724e-01, -1.1636e-02, -2.0249e-02, -2.1518e-03,
         1.8931e-02, -4.5058e-03,  9.9946e-02,  1.5879e-01,  4.4268e-02,
        -2.1518e-03, -2.2074e-02, -2.1518e-03, -2.8337e-02, -2.0392e-02,
         1.2945e-01,  3.7736e-02, -9.8375e-03, -2.1518e-03,  1.1194e+00,
         6.5877e-02, -2.1527e-02, -2.0392e-02,  2.0452e-01, -2.1465e-02,
         2.9773e-01,  4.2112e-02,  4.4977e-01,  1.3724e-01, -1.1636e-02,
        -1.6013e-02, -2.1518e-03,  3.3030e-02,  8.0457e-01,  1.9686e-02,
         5.5098e-01,  2.3997e-01,  1.5847e-01, -1.3469e-02,  1.0172e-03,
         1.2945e-01, -1.6788e-02,  3.3441e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03, -2.1756e-03, -2.1756e-03, -2.1756e-03,
        -2.1756e-03, -2.1756e-03

Tokens embeddings:
tensor([ 2.7411e-02, -1.9409e-01, -1.0473e-02, -1.9156e-03, -1.6954e-02,
        -2.1465e-02, -2.7676e-02,  1.0056e-02,  9.7448e-02,  1.4232e-01,
         2.2607e-01,  4.4977e-01,  2.9773e-01, -5.7123e-02, -2.1518e-03,
         2.8717e-01,  1.3724e-01, -1.1636e-02, -2.0249e-02, -2.1518e-03,
         1.8931e-02, -4.5058e-03,  9.9946e-02,  1.5879e-01,  4.4268e-02,
        -2.1518e-03, -2.2074e-02, -2.1518e-03, -2.8337e-02, -2.0392e-02,
         1.2945e-01,  3.7736e-02, -9.8375e-03, -2.1518e-03,  1.1194e+00,
         6.5877e-02, -2.1527e-02, -2.0392e-02,  2.0452e-01, -2.1465e-02,
         2.9773e-01,  4.2112e-02,  4.4977e-01,  1.3724e-01, -1.1636e-02,
        -1.6013e-02, -2.1518e-03,  3.3030e-02,  8.0457e-01,  1.9686e-02,
         5.5098e-01,  2.3997e-01,  1.5847e-01, -1.3469e-02,  1.0172e-03,
         1.2945e-01, -1.6788e-02,  3.3441e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03, -2.1756e-03, -2.1756e-03, -2.1756e-03,
        -2.1756e-03, -2.1756e-03

Tokens embeddings:
tensor([ 2.7411e-02, -1.9409e-01, -1.0473e-02, -1.9156e-03, -1.6954e-02,
        -2.1465e-02, -2.7676e-02,  1.0056e-02,  9.7448e-02,  1.4232e-01,
         2.2607e-01,  4.4977e-01,  2.9773e-01, -5.7123e-02, -2.1518e-03,
         2.8717e-01,  1.3724e-01, -1.1636e-02, -2.0249e-02, -2.1518e-03,
         1.8931e-02, -4.5058e-03,  9.9946e-02,  1.5879e-01,  4.4268e-02,
        -2.1518e-03, -2.2074e-02, -2.1518e-03, -2.8337e-02, -2.0392e-02,
         1.2945e-01,  3.7736e-02, -9.8375e-03, -2.1518e-03,  1.1194e+00,
         6.5877e-02, -2.1527e-02, -2.0392e-02,  2.0452e-01, -2.1465e-02,
         2.9773e-01,  4.2112e-02,  4.4977e-01,  1.3724e-01, -1.1636e-02,
        -1.6013e-02, -2.1518e-03,  3.3030e-02,  8.0457e-01,  1.9686e-02,
         5.5098e-01,  2.3997e-01,  1.5847e-01, -1.3469e-02,  1.0172e-03,
         1.2945e-01, -1.6788e-02,  3.3441e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03, -2.1756e-03, -2.1756e-03, -2.1756e-03,
        -2.1756e-03, -2.1756e-03

Tokens embeddings:
tensor([ 2.7411e-02, -1.9409e-01, -1.0473e-02, -1.9156e-03, -1.6954e-02,
        -2.1465e-02, -2.7676e-02,  1.0056e-02,  9.7448e-02,  1.4232e-01,
         2.2607e-01,  4.4977e-01,  2.9773e-01, -5.7123e-02, -2.1518e-03,
         2.8717e-01,  1.3724e-01, -1.1636e-02, -2.0249e-02, -2.1518e-03,
         1.8931e-02, -4.5058e-03,  9.9946e-02,  1.5879e-01,  4.4268e-02,
        -2.1518e-03, -2.2074e-02, -2.1518e-03, -2.8337e-02, -2.0392e-02,
         1.2945e-01,  3.7736e-02, -9.8375e-03, -2.1518e-03,  1.1194e+00,
         6.5877e-02, -2.1527e-02, -2.0392e-02,  2.0452e-01, -2.1465e-02,
         2.9773e-01,  4.2112e-02,  4.4977e-01,  1.3724e-01, -1.1636e-02,
        -1.6013e-02, -2.1518e-03,  3.3030e-02,  8.0457e-01,  1.9686e-02,
         5.5098e-01,  2.3997e-01,  1.5847e-01, -1.3469e-02,  1.0172e-03,
         1.2945e-01, -1.6788e-02,  3.3441e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03, -2.1756e-03, -2.1756e-03, -2.1756e-03,
        -2.1756e-03, -2.1756e-03

Tokens embeddings:
tensor([ 2.7411e-02, -1.9409e-01, -1.0473e-02, -1.9156e-03, -1.6954e-02,
        -2.1465e-02, -2.7676e-02,  1.0056e-02,  9.7448e-02,  1.4232e-01,
         2.2607e-01,  4.4977e-01,  2.9773e-01, -5.7123e-02, -2.1518e-03,
         2.8717e-01,  1.3724e-01, -1.1636e-02, -2.0249e-02, -2.1518e-03,
         1.8931e-02, -4.5058e-03,  9.9946e-02,  1.5879e-01,  4.4268e-02,
        -2.1518e-03, -2.2074e-02, -2.1518e-03, -2.8337e-02, -2.0392e-02,
         1.2945e-01,  3.7736e-02, -9.8375e-03, -2.1518e-03,  1.1194e+00,
         6.5877e-02, -2.1527e-02, -2.0392e-02,  2.0452e-01, -2.1465e-02,
         2.9773e-01,  4.2112e-02,  4.4977e-01,  1.3724e-01, -1.1636e-02,
        -1.6013e-02, -2.1518e-03,  3.3030e-02,  8.0457e-01,  1.9686e-02,
         5.5098e-01,  2.3997e-01,  1.5847e-01, -1.3469e-02,  1.0172e-03,
         1.2945e-01, -1.6788e-02,  3.3441e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03, -2.1756e-03, -2.1756e-03, -2.1756e-03,
        -2.1756e-03, -2.1756e-03

Tokens embeddings:
tensor([ 2.7411e-02, -1.9409e-01, -1.0473e-02, -1.9156e-03, -1.6954e-02,
        -2.1465e-02, -2.7676e-02,  1.0056e-02,  9.7448e-02,  1.4232e-01,
         2.2607e-01,  4.4977e-01,  2.9773e-01, -5.7123e-02, -2.1518e-03,
         2.8717e-01,  1.3724e-01, -1.1636e-02, -2.0249e-02, -2.1518e-03,
         1.8931e-02, -4.5058e-03,  9.9946e-02,  1.5879e-01,  4.4268e-02,
        -2.1518e-03, -2.2074e-02, -2.1518e-03, -2.8337e-02, -2.0392e-02,
         1.2945e-01,  3.7736e-02, -9.8375e-03, -2.1518e-03,  1.1194e+00,
         6.5877e-02, -2.1527e-02, -2.0392e-02,  2.0452e-01, -2.1465e-02,
         2.9773e-01,  4.2112e-02,  4.4977e-01,  1.3724e-01, -1.1636e-02,
        -1.6013e-02, -2.1518e-03,  3.3030e-02,  8.0457e-01,  1.9686e-02,
         5.5098e-01,  2.3997e-01,  1.5847e-01, -1.3469e-02,  1.0172e-03,
         1.2945e-01, -1.6788e-02,  3.3441e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03, -2.1756e-03, -2.1756e-03, -2.1756e-03,
        -2.1756e-03, -2.1756e-03

Tokens embeddings:
tensor([ 2.7411e-02, -1.9409e-01, -1.0473e-02, -1.9156e-03, -1.6954e-02,
        -2.1465e-02, -2.7676e-02,  1.0056e-02,  9.7448e-02,  1.4232e-01,
         2.2607e-01,  4.4977e-01,  2.9773e-01, -5.7123e-02, -2.1518e-03,
         2.8717e-01,  1.3724e-01, -1.1636e-02, -2.0249e-02, -2.1518e-03,
         1.8931e-02, -4.5058e-03,  9.9946e-02,  1.5879e-01,  4.4268e-02,
        -2.1518e-03, -2.2074e-02, -2.1518e-03, -2.8337e-02, -2.0392e-02,
         1.2945e-01,  3.7736e-02, -9.8375e-03, -2.1518e-03,  1.1194e+00,
         6.5877e-02, -2.1527e-02, -2.0392e-02,  2.0452e-01, -2.1465e-02,
         2.9773e-01,  4.2112e-02,  4.4977e-01,  1.3724e-01, -1.1636e-02,
        -1.6013e-02, -2.1518e-03,  3.3030e-02,  8.0457e-01,  1.9686e-02,
         5.5098e-01,  2.3997e-01,  1.5847e-01, -1.3469e-02,  1.0172e-03,
         1.2945e-01, -1.6788e-02,  3.3441e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03, -2.1756e-03, -2.1756e-03, -2.1756e-03,
        -2.1756e-03, -2.1756e-03

Tokens embeddings:
tensor([ 2.7411e-02, -1.9409e-01, -1.0473e-02, -1.9156e-03, -1.6954e-02,
        -2.1465e-02, -2.7676e-02,  1.0056e-02,  9.7448e-02,  1.4232e-01,
         2.2607e-01,  4.4977e-01,  2.9773e-01, -5.7123e-02, -2.1518e-03,
         2.8717e-01,  1.3724e-01, -1.1636e-02, -2.0249e-02, -2.1518e-03,
         1.8931e-02, -4.5058e-03,  9.9946e-02,  1.5879e-01,  4.4268e-02,
        -2.1518e-03, -2.2074e-02, -2.1518e-03, -2.8337e-02, -2.0392e-02,
         1.2945e-01,  3.7736e-02, -9.8375e-03, -2.1518e-03,  1.1194e+00,
         6.5877e-02, -2.1527e-02, -2.0392e-02,  2.0452e-01, -2.1465e-02,
         2.9773e-01,  4.2112e-02,  4.4977e-01,  1.3724e-01, -1.1636e-02,
        -1.6013e-02, -2.1518e-03,  3.3030e-02,  8.0457e-01,  1.9686e-02,
         5.5098e-01,  2.3997e-01,  1.5847e-01, -1.3469e-02,  1.0172e-03,
         1.2945e-01, -1.6788e-02,  3.3441e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03, -2.1756e-03, -2.1756e-03, -2.1756e-03,
        -2.1756e-03, -2.1756e-03

Tokens embeddings:
tensor([ 2.7411e-02, -1.9409e-01, -1.0473e-02, -1.9156e-03, -1.6954e-02,
        -2.1465e-02, -2.7676e-02,  1.0056e-02,  9.7448e-02,  1.4232e-01,
         2.2607e-01,  4.4977e-01,  2.9773e-01, -5.7123e-02, -2.1518e-03,
         2.8717e-01,  1.3724e-01, -1.1636e-02, -2.0249e-02, -2.1518e-03,
         1.8931e-02, -4.5058e-03,  9.9946e-02,  1.5879e-01,  4.4268e-02,
        -2.1518e-03, -2.2074e-02, -2.1518e-03, -2.8337e-02, -2.0392e-02,
         1.2945e-01,  3.7736e-02, -9.8375e-03, -2.1518e-03,  1.1194e+00,
         6.5877e-02, -2.1527e-02, -2.0392e-02,  2.0452e-01, -2.1465e-02,
         2.9773e-01,  4.2112e-02,  4.4977e-01,  1.3724e-01, -1.1636e-02,
        -1.6013e-02, -2.1518e-03,  3.3030e-02,  8.0457e-01,  1.9686e-02,
         5.5098e-01,  2.3997e-01,  1.5847e-01, -1.3469e-02,  1.0172e-03,
         1.2945e-01, -1.6788e-02,  3.3441e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03, -2.1756e-03, -2.1756e-03, -2.1756e-03,
        -2.1756e-03, -2.1756e-03

Tokens embeddings:
tensor([ 2.7411e-02, -1.9409e-01, -1.0473e-02, -1.9156e-03, -1.6954e-02,
        -2.1465e-02, -2.7676e-02,  1.0056e-02,  9.7448e-02,  1.4232e-01,
         2.2607e-01,  4.4977e-01,  2.9773e-01, -5.7123e-02, -2.1518e-03,
         2.8717e-01,  1.3724e-01, -1.1636e-02, -2.0249e-02, -2.1518e-03,
         1.8931e-02, -4.5058e-03,  9.9946e-02,  1.5879e-01,  4.4268e-02,
        -2.1518e-03, -2.2074e-02, -2.1518e-03, -2.8337e-02, -2.0392e-02,
         1.2945e-01,  3.7736e-02, -9.8375e-03, -2.1518e-03,  1.1194e+00,
         6.5877e-02, -2.1527e-02, -2.0392e-02,  2.0452e-01, -2.1465e-02,
         2.9773e-01,  4.2112e-02,  4.4977e-01,  1.3724e-01, -1.1636e-02,
        -1.6013e-02, -2.1518e-03,  3.3030e-02,  8.0457e-01,  1.9686e-02,
         5.5098e-01,  2.3997e-01,  1.5847e-01, -1.3469e-02,  1.0172e-03,
         1.2945e-01, -1.6788e-02,  3.3441e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03, -2.1756e-03, -2.1756e-03, -2.1756e-03,
        -2.1756e-03, -2.1756e-03

Tokens embeddings:
tensor([ 2.7411e-02, -1.9409e-01, -1.0473e-02, -1.9156e-03, -1.6954e-02,
        -2.1465e-02, -2.7676e-02,  1.0056e-02,  9.7448e-02,  1.4232e-01,
         2.2607e-01,  4.4977e-01,  2.9773e-01, -5.7123e-02, -2.1518e-03,
         2.8717e-01,  1.3724e-01, -1.1636e-02, -2.0249e-02, -2.1518e-03,
         1.8931e-02, -4.5058e-03,  9.9946e-02,  1.5879e-01,  4.4268e-02,
        -2.1518e-03, -2.2074e-02, -2.1518e-03, -2.8337e-02, -2.0392e-02,
         1.2945e-01,  3.7736e-02, -9.8375e-03, -2.1518e-03,  1.1194e+00,
         6.5877e-02, -2.1527e-02, -2.0392e-02,  2.0452e-01, -2.1465e-02,
         2.9773e-01,  4.2112e-02,  4.4977e-01,  1.3724e-01, -1.1636e-02,
        -1.6013e-02, -2.1518e-03,  3.3030e-02,  8.0457e-01,  1.9686e-02,
         5.5098e-01,  2.3997e-01,  1.5847e-01, -1.3469e-02,  1.0172e-03,
         1.2945e-01, -1.6788e-02,  3.3441e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03, -2.1756e-03, -2.1756e-03, -2.1756e-03,
        -2.1756e-03, -2.1756e-03

Tokens embeddings:
tensor([ 2.7411e-02, -1.9409e-01, -1.0473e-02, -1.9156e-03, -1.6954e-02,
        -2.1465e-02, -2.7676e-02,  1.0056e-02,  9.7448e-02,  1.4232e-01,
         2.2607e-01,  4.4977e-01,  2.9773e-01, -5.7123e-02, -2.1518e-03,
         2.8717e-01,  1.3724e-01, -1.1636e-02, -2.0249e-02, -2.1518e-03,
         1.8931e-02, -4.5058e-03,  9.9946e-02,  1.5879e-01,  4.4268e-02,
        -2.1518e-03, -2.2074e-02, -2.1518e-03, -2.8337e-02, -2.0392e-02,
         1.2945e-01,  3.7736e-02, -9.8375e-03, -2.1518e-03,  1.1194e+00,
         6.5877e-02, -2.1527e-02, -2.0392e-02,  2.0452e-01, -2.1465e-02,
         2.9773e-01,  4.2112e-02,  4.4977e-01,  1.3724e-01, -1.1636e-02,
        -1.6013e-02, -2.1518e-03,  3.3030e-02,  8.0457e-01,  1.9686e-02,
         5.5098e-01,  2.3997e-01,  1.5847e-01, -1.3469e-02,  1.0172e-03,
         1.2945e-01, -1.6788e-02,  3.3441e-01, -2.7676e-02,  8.6060e-01,
         3.2138e-01, -2.1756e-03, -2.1756e-03, -2.1756e-03, -2.1756e-03,
        -2.1756e-03, -2.1756e-03

Tokens embeddings:
tensor([ 0.0274, -0.0223, -0.0102, -0.0077,  0.1591, -0.0202, -0.0022,  0.0494,
        -0.0019,  0.0327,  0.1226, -0.0192, -0.0207,  0.0775, -0.0277,  0.0101,
        -0.1391, -0.1062, -0.0022, -0.0190,  0.8606,  0.1953,  0.2050, -0.0204,
         0.0312, -0.1391, -0.0201,  0.1226,  0.2231, -0.0162, -0.3577,  0.0026,
         0.0026, -0.0022, -0.0190, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0223, -0.0102, -

Tokens embeddings:
tensor([ 0.0274, -0.0223, -0.0102, -0.0077,  0.1591, -0.0202, -0.0022,  0.0494,
        -0.0019,  0.0327,  0.1226, -0.0192, -0.0207,  0.0775, -0.0277,  0.0101,
        -0.1391, -0.1062, -0.0022, -0.0190,  0.8606,  0.1953,  0.2050, -0.0204,
         0.0312, -0.1391, -0.0201,  0.1226,  0.2231, -0.0162, -0.3577,  0.0026,
         0.0026, -0.0022, -0.0190, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0223, -0.0102, -

Tokens embeddings:
tensor([ 0.0274, -0.0223, -0.0102, -0.0077,  0.1591, -0.0202, -0.0022,  0.0494,
        -0.0019,  0.0327,  0.1226, -0.0192, -0.0207,  0.0775, -0.0277,  0.0101,
        -0.1391, -0.1062, -0.0022, -0.0190,  0.8606,  0.1953,  0.2050, -0.0204,
         0.0312, -0.1391, -0.0201,  0.1226,  0.2231, -0.0162, -0.3577,  0.0026,
         0.0026, -0.0022, -0.0190, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0223, -0.0102, -

Tokens embeddings:
tensor([ 0.0274, -0.0223, -0.0102, -0.0077,  0.1591, -0.0202, -0.0022,  0.0494,
        -0.0019,  0.0327,  0.1226, -0.0192, -0.0207,  0.0775, -0.0277,  0.0101,
        -0.1391, -0.1062, -0.0022, -0.0190,  0.8606,  0.1953,  0.2050, -0.0204,
         0.0312, -0.1391, -0.0201,  0.1226,  0.2231, -0.0162, -0.3577,  0.0026,
         0.0026, -0.0022, -0.0190, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0223, -0.0102, -

Tokens embeddings:
tensor([ 0.0274, -0.0223, -0.0102, -0.0077,  0.1591, -0.0202, -0.0022,  0.0494,
        -0.0019,  0.0327,  0.1226, -0.0192, -0.0207,  0.0775, -0.0277,  0.0101,
        -0.1391, -0.1062, -0.0022, -0.0190,  0.8606,  0.1953,  0.2050, -0.0204,
         0.0312, -0.1391, -0.0201,  0.1226,  0.2231, -0.0162, -0.3577,  0.0026,
         0.0026, -0.0022, -0.0190, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0223, -0.0102, -

Tokens embeddings:
tensor([ 0.0274, -0.0223, -0.0102, -0.0077,  0.1591, -0.0202, -0.0022,  0.0494,
        -0.0019,  0.0327,  0.1226, -0.0192, -0.0207,  0.0775, -0.0277,  0.0101,
        -0.1391, -0.1062, -0.0022, -0.0190,  0.8606,  0.1953,  0.2050, -0.0204,
         0.0312, -0.1391, -0.0201,  0.1226,  0.2231, -0.0162, -0.3577,  0.0026,
         0.0026, -0.0022, -0.0190, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0223, -0.0102, -

Tokens embeddings:
tensor([ 0.0274, -0.0223, -0.0102, -0.0077,  0.1591, -0.0202, -0.0022,  0.0494,
        -0.0019,  0.0327,  0.1226, -0.0192, -0.0207,  0.0775, -0.0277,  0.0101,
        -0.1391, -0.1062, -0.0022, -0.0190,  0.8606,  0.1953,  0.2050, -0.0204,
         0.0312, -0.1391, -0.0201,  0.1226,  0.2231, -0.0162, -0.3577,  0.0026,
         0.0026, -0.0022, -0.0190, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0223, -0.0102, -

Tokens embeddings:
tensor([ 0.0274, -0.0223, -0.0102, -0.0077,  0.1591, -0.0202, -0.0022,  0.0494,
        -0.0019,  0.0327,  0.1226, -0.0192, -0.0207,  0.0775, -0.0277,  0.0101,
        -0.1391, -0.1062, -0.0022, -0.0190,  0.8606,  0.1953,  0.2050, -0.0204,
         0.0312, -0.1391, -0.0201,  0.1226,  0.2231, -0.0162, -0.3577,  0.0026,
         0.0026, -0.0022, -0.0190, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0223, -0.0102, -

Tokens embeddings:
tensor([ 0.0274, -0.0223, -0.0102, -0.0077,  0.1591, -0.0202, -0.0022,  0.0494,
        -0.0019,  0.0327,  0.1226, -0.0192, -0.0207,  0.0775, -0.0277,  0.0101,
        -0.1391, -0.1062, -0.0022, -0.0190,  0.8606,  0.1953,  0.2050, -0.0204,
         0.0312, -0.1391, -0.0201,  0.1226,  0.2231, -0.0162, -0.3577,  0.0026,
         0.0026, -0.0022, -0.0190, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0223, -0.0102, -

Tokens embeddings:
tensor([ 0.0274, -0.0223, -0.0102, -0.0077,  0.1591, -0.0202, -0.0022,  0.0494,
        -0.0019,  0.0327,  0.1226, -0.0192, -0.0207,  0.0775, -0.0277,  0.0101,
        -0.1391, -0.1062, -0.0022, -0.0190,  0.8606,  0.1953,  0.2050, -0.0204,
         0.0312, -0.1391, -0.0201,  0.1226,  0.2231, -0.0162, -0.3577,  0.0026,
         0.0026, -0.0022, -0.0190, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0223, -0.0102, -

Tokens embeddings:
tensor([ 0.0274, -0.0223, -0.0102, -0.0077,  0.1591, -0.0202, -0.0022,  0.0494,
        -0.0019,  0.0327,  0.1226, -0.0192, -0.0207,  0.0775, -0.0277,  0.0101,
        -0.1391, -0.1062, -0.0022, -0.0190,  0.8606,  0.1953,  0.2050, -0.0204,
         0.0312, -0.1391, -0.0201,  0.1226,  0.2231, -0.0162, -0.3577,  0.0026,
         0.0026, -0.0022, -0.0190, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0223, -0.0102, -

Tokens embeddings:
tensor([ 0.0274, -0.0223, -0.0102, -0.0077,  0.1591, -0.0202, -0.0022,  0.0494,
        -0.0019,  0.0327,  0.1226, -0.0192, -0.0207,  0.0775, -0.0277,  0.0101,
        -0.1391, -0.1062, -0.0022, -0.0190,  0.8606,  0.1953,  0.2050, -0.0204,
         0.0312, -0.1391, -0.0201,  0.1226,  0.2231, -0.0162, -0.3577,  0.0026,
         0.0026, -0.0022, -0.0190, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0223, -0.0102, -

Tokens embeddings:
tensor([ 0.0274,  0.0172,  0.8606, -0.0186,  0.2133, -0.0311,  0.1562, -0.0222,
         0.1742, -0.0767, -0.0149,  0.0948,  0.0633,  0.5354, -0.0139, -0.0172,
         0.0431,  0.1562,  0.0684, -0.0158,  0.4237,  0.0144, -0.0531,  0.6025,
         0.0684,  0.1742,  0.2872,  0.0220,  1.1194,  0.0312,  0.0090,  0.1079,
        -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0172,  0.8606, -

Tokens embeddings:
tensor([ 0.0274,  0.0172,  0.8606, -0.0186,  0.2133, -0.0311,  0.1562, -0.0222,
         0.1742, -0.0767, -0.0149,  0.0948,  0.0633,  0.5354, -0.0139, -0.0172,
         0.0431,  0.1562,  0.0684, -0.0158,  0.4237,  0.0144, -0.0531,  0.6025,
         0.0684,  0.1742,  0.2872,  0.0220,  1.1194,  0.0312,  0.0090,  0.1079,
        -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0172,  0.8606, -

Tokens embeddings:
tensor([ 0.0274,  0.0172,  0.8606, -0.0186,  0.2133, -0.0311,  0.1562, -0.0222,
         0.1742, -0.0767, -0.0149,  0.0948,  0.0633,  0.5354, -0.0139, -0.0172,
         0.0431,  0.1562,  0.0684, -0.0158,  0.4237,  0.0144, -0.0531,  0.6025,
         0.0684,  0.1742,  0.2872,  0.0220,  1.1194,  0.0312,  0.0090,  0.1079,
        -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0172,  0.8606, -

Tokens embeddings:
tensor([ 0.0274,  0.0172,  0.8606, -0.0186,  0.2133, -0.0311,  0.1562, -0.0222,
         0.1742, -0.0767, -0.0149,  0.0948,  0.0633,  0.5354, -0.0139, -0.0172,
         0.0431,  0.1562,  0.0684, -0.0158,  0.4237,  0.0144, -0.0531,  0.6025,
         0.0684,  0.1742,  0.2872,  0.0220,  1.1194,  0.0312,  0.0090,  0.1079,
        -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0172,  0.8606, -

Tokens embeddings:
tensor([ 0.0274,  0.0172,  0.8606, -0.0186,  0.2133, -0.0311,  0.1562, -0.0222,
         0.1742, -0.0767, -0.0149,  0.0948,  0.0633,  0.5354, -0.0139, -0.0172,
         0.0431,  0.1562,  0.0684, -0.0158,  0.4237,  0.0144, -0.0531,  0.6025,
         0.0684,  0.1742,  0.2872,  0.0220,  1.1194,  0.0312,  0.0090,  0.1079,
        -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0172,  0.8606, -

Tokens embeddings:
tensor([ 0.0274,  0.0172,  0.8606, -0.0186,  0.2133, -0.0311,  0.1562, -0.0222,
         0.1742, -0.0767, -0.0149,  0.0948,  0.0633,  0.5354, -0.0139, -0.0172,
         0.0431,  0.1562,  0.0684, -0.0158,  0.4237,  0.0144, -0.0531,  0.6025,
         0.0684,  0.1742,  0.2872,  0.0220,  1.1194,  0.0312,  0.0090,  0.1079,
        -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0172,  0.8606, -

Tokens embeddings:
tensor([ 0.0274,  0.0172,  0.8606, -0.0186,  0.2133, -0.0311,  0.1562, -0.0222,
         0.1742, -0.0767, -0.0149,  0.0948,  0.0633,  0.5354, -0.0139, -0.0172,
         0.0431,  0.1562,  0.0684, -0.0158,  0.4237,  0.0144, -0.0531,  0.6025,
         0.0684,  0.1742,  0.2872,  0.0220,  1.1194,  0.0312,  0.0090,  0.1079,
        -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0172,  0.8606, -

Tokens embeddings:
tensor([ 0.0274,  0.0172,  0.8606, -0.0186,  0.2133, -0.0311,  0.1562, -0.0222,
         0.1742, -0.0767, -0.0149,  0.0948,  0.0633,  0.5354, -0.0139, -0.0172,
         0.0431,  0.1562,  0.0684, -0.0158,  0.4237,  0.0144, -0.0531,  0.6025,
         0.0684,  0.1742,  0.2872,  0.0220,  1.1194,  0.0312,  0.0090,  0.1079,
        -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0172,  0.8606, -

Tokens embeddings:
tensor([ 0.0274,  0.0172,  0.8606, -0.0186,  0.2133, -0.0311,  0.1562, -0.0222,
         0.1742, -0.0767, -0.0149,  0.0948,  0.0633,  0.5354, -0.0139, -0.0172,
         0.0431,  0.1562,  0.0684, -0.0158,  0.4237,  0.0144, -0.0531,  0.6025,
         0.0684,  0.1742,  0.2872,  0.0220,  1.1194,  0.0312,  0.0090,  0.1079,
        -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0172,  0.8606, -

Tokens embeddings:
tensor([ 0.0274,  0.0172,  0.8606, -0.0186,  0.2133, -0.0311,  0.1562, -0.0222,
         0.1742, -0.0767, -0.0149,  0.0948,  0.0633,  0.5354, -0.0139, -0.0172,
         0.0431,  0.1562,  0.0684, -0.0158,  0.4237,  0.0144, -0.0531,  0.6025,
         0.0684,  0.1742,  0.2872,  0.0220,  1.1194,  0.0312,  0.0090,  0.1079,
        -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0172,  0.8606, -

Tokens embeddings:
tensor([ 0.0274,  0.0172,  0.8606, -0.0186,  0.2133, -0.0311,  0.1562, -0.0222,
         0.1742, -0.0767, -0.0149,  0.0948,  0.0633,  0.5354, -0.0139, -0.0172,
         0.0431,  0.1562,  0.0684, -0.0158,  0.4237,  0.0144, -0.0531,  0.6025,
         0.0684,  0.1742,  0.2872,  0.0220,  1.1194,  0.0312,  0.0090,  0.1079,
        -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0172,  0.8606, -

Tokens embeddings:
tensor([ 0.0274,  0.0172,  0.8606, -0.0186,  0.2133, -0.0311,  0.1562, -0.0222,
         0.1742, -0.0767, -0.0149,  0.0948,  0.0633,  0.5354, -0.0139, -0.0172,
         0.0431,  0.1562,  0.0684, -0.0158,  0.4237,  0.0144, -0.0531,  0.6025,
         0.0684,  0.1742,  0.2872,  0.0220,  1.1194,  0.0312,  0.0090,  0.1079,
        -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0172,  0.8606, -

Tokens embeddings:
tensor([ 0.0274, -0.0732,  0.1435,  1.1194,  0.2977, -0.0485,  0.3100,  0.1833,
         0.4684,  0.2977,  0.1761, -0.0077,  0.1912,  0.2439,  0.2977,  0.0429,
         0.4498,  0.2147, -0.0077, -0.0017,  0.6332,  1.1194, -0.0546,  0.3343,
         0.2055,  0.2011, -0.0022,  0.4133, -0.0212, -0.0202, -0.0215,  0.1015,
         0.0948,  1.4764,  0.2572,  0.9712, -0.0025, -0.0277,  0.8606, -0.0198,
         0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0732,  0.1435,  

Tokens embeddings:
tensor([ 0.0274, -0.0732,  0.1435,  1.1194,  0.2977, -0.0485,  0.3100,  0.1833,
         0.4684,  0.2977,  0.1761, -0.0077,  0.1912,  0.2439,  0.2977,  0.0429,
         0.4498,  0.2147, -0.0077, -0.0017,  0.6332,  1.1194, -0.0546,  0.3343,
         0.2055,  0.2011, -0.0022,  0.4133, -0.0212, -0.0202, -0.0215,  0.1015,
         0.0948,  1.4764,  0.2572,  0.9712, -0.0025, -0.0277,  0.8606, -0.0198,
         0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0732,  0.1435,  

Tokens embeddings:
tensor([ 0.0274, -0.0732,  0.1435,  1.1194,  0.2977, -0.0485,  0.3100,  0.1833,
         0.4684,  0.2977,  0.1761, -0.0077,  0.1912,  0.2439,  0.2977,  0.0429,
         0.4498,  0.2147, -0.0077, -0.0017,  0.6332,  1.1194, -0.0546,  0.3343,
         0.2055,  0.2011, -0.0022,  0.4133, -0.0212, -0.0202, -0.0215,  0.1015,
         0.0948,  1.4764,  0.2572,  0.9712, -0.0025, -0.0277,  0.8606, -0.0198,
         0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0732,  0.1435,  

Tokens embeddings:
tensor([ 0.0274, -0.0732,  0.1435,  1.1194,  0.2977, -0.0485,  0.3100,  0.1833,
         0.4684,  0.2977,  0.1761, -0.0077,  0.1912,  0.2439,  0.2977,  0.0429,
         0.4498,  0.2147, -0.0077, -0.0017,  0.6332,  1.1194, -0.0546,  0.3343,
         0.2055,  0.2011, -0.0022,  0.4133, -0.0212, -0.0202, -0.0215,  0.1015,
         0.0948,  1.4764,  0.2572,  0.9712, -0.0025, -0.0277,  0.8606, -0.0198,
         0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0732,  0.1435,  

Tokens embeddings:
tensor([ 0.0274, -0.0732,  0.1435,  1.1194,  0.2977, -0.0485,  0.3100,  0.1833,
         0.4684,  0.2977,  0.1761, -0.0077,  0.1912,  0.2439,  0.2977,  0.0429,
         0.4498,  0.2147, -0.0077, -0.0017,  0.6332,  1.1194, -0.0546,  0.3343,
         0.2055,  0.2011, -0.0022,  0.4133, -0.0212, -0.0202, -0.0215,  0.1015,
         0.0948,  1.4764,  0.2572,  0.9712, -0.0025, -0.0277,  0.8606, -0.0198,
         0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0732,  0.1435,  

Tokens embeddings:
tensor([ 0.0274, -0.0732,  0.1435,  1.1194,  0.2977, -0.0485,  0.3100,  0.1833,
         0.4684,  0.2977,  0.1761, -0.0077,  0.1912,  0.2439,  0.2977,  0.0429,
         0.4498,  0.2147, -0.0077, -0.0017,  0.6332,  1.1194, -0.0546,  0.3343,
         0.2055,  0.2011, -0.0022,  0.4133, -0.0212, -0.0202, -0.0215,  0.1015,
         0.0948,  1.4764,  0.2572,  0.9712, -0.0025, -0.0277,  0.8606, -0.0198,
         0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0732,  0.1435,  

Tokens embeddings:
tensor([ 0.0274, -0.0732,  0.1435,  1.1194,  0.2977, -0.0485,  0.3100,  0.1833,
         0.4684,  0.2977,  0.1761, -0.0077,  0.1912,  0.2439,  0.2977,  0.0429,
         0.4498,  0.2147, -0.0077, -0.0017,  0.6332,  1.1194, -0.0546,  0.3343,
         0.2055,  0.2011, -0.0022,  0.4133, -0.0212, -0.0202, -0.0215,  0.1015,
         0.0948,  1.4764,  0.2572,  0.9712, -0.0025, -0.0277,  0.8606, -0.0198,
         0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0732,  0.1435,  

Tokens embeddings:
tensor([ 0.0274, -0.0732,  0.1435,  1.1194,  0.2977, -0.0485,  0.3100,  0.1833,
         0.4684,  0.2977,  0.1761, -0.0077,  0.1912,  0.2439,  0.2977,  0.0429,
         0.4498,  0.2147, -0.0077, -0.0017,  0.6332,  1.1194, -0.0546,  0.3343,
         0.2055,  0.2011, -0.0022,  0.4133, -0.0212, -0.0202, -0.0215,  0.1015,
         0.0948,  1.4764,  0.2572,  0.9712, -0.0025, -0.0277,  0.8606, -0.0198,
         0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0732,  0.1435,  

Tokens embeddings:
tensor([ 0.0274, -0.0732,  0.1435,  1.1194,  0.2977, -0.0485,  0.3100,  0.1833,
         0.4684,  0.2977,  0.1761, -0.0077,  0.1912,  0.2439,  0.2977,  0.0429,
         0.4498,  0.2147, -0.0077, -0.0017,  0.6332,  1.1194, -0.0546,  0.3343,
         0.2055,  0.2011, -0.0022,  0.4133, -0.0212, -0.0202, -0.0215,  0.1015,
         0.0948,  1.4764,  0.2572,  0.9712, -0.0025, -0.0277,  0.8606, -0.0198,
         0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0732,  0.1435,  

Tokens embeddings:
tensor([ 0.0274, -0.0732,  0.1435,  1.1194,  0.2977, -0.0485,  0.3100,  0.1833,
         0.4684,  0.2977,  0.1761, -0.0077,  0.1912,  0.2439,  0.2977,  0.0429,
         0.4498,  0.2147, -0.0077, -0.0017,  0.6332,  1.1194, -0.0546,  0.3343,
         0.2055,  0.2011, -0.0022,  0.4133, -0.0212, -0.0202, -0.0215,  0.1015,
         0.0948,  1.4764,  0.2572,  0.9712, -0.0025, -0.0277,  0.8606, -0.0198,
         0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0732,  0.1435,  

Tokens embeddings:
tensor([ 0.0274, -0.0732,  0.1435,  1.1194,  0.2977, -0.0485,  0.3100,  0.1833,
         0.4684,  0.2977,  0.1761, -0.0077,  0.1912,  0.2439,  0.2977,  0.0429,
         0.4498,  0.2147, -0.0077, -0.0017,  0.6332,  1.1194, -0.0546,  0.3343,
         0.2055,  0.2011, -0.0022,  0.4133, -0.0212, -0.0202, -0.0215,  0.1015,
         0.0948,  1.4764,  0.2572,  0.9712, -0.0025, -0.0277,  0.8606, -0.0198,
         0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0732,  0.1435,  

Tokens embeddings:
tensor([ 0.0274, -0.0732,  0.1435,  1.1194,  0.2977, -0.0485,  0.3100,  0.1833,
         0.4684,  0.2977,  0.1761, -0.0077,  0.1912,  0.2439,  0.2977,  0.0429,
         0.4498,  0.2147, -0.0077, -0.0017,  0.6332,  1.1194, -0.0546,  0.3343,
         0.2055,  0.2011, -0.0022,  0.4133, -0.0212, -0.0202, -0.0215,  0.1015,
         0.0948,  1.4764,  0.2572,  0.9712, -0.0025, -0.0277,  0.8606, -0.0198,
         0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0732,  0.1435,  

Tokens embeddings:
tensor([ 0.0274, -0.0069, -0.0215,  0.1776,  0.5664,  0.5234,  0.1472,  0.0931,
         0.5168,  0.1201,  0.2579, -0.0317, -0.0019, -0.0235, -0.0277,  0.0101,
         0.0252,  0.4704, -0.0407,  0.0999,  0.3104,  0.0897, -0.0022,  1.1194,
         0.3100, -0.0109,  0.0165,  0.2662,  0.1708,  0.0316,  0.6025,  0.2662,
        -0.1396, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0069, -0.0215,  

Tokens embeddings:
tensor([ 0.0274, -0.0069, -0.0215,  0.1776,  0.5664,  0.5234,  0.1472,  0.0931,
         0.5168,  0.1201,  0.2579, -0.0317, -0.0019, -0.0235, -0.0277,  0.0101,
         0.0252,  0.4704, -0.0407,  0.0999,  0.3104,  0.0897, -0.0022,  1.1194,
         0.3100, -0.0109,  0.0165,  0.2662,  0.1708,  0.0316,  0.6025,  0.2662,
        -0.1396, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0069, -0.0215,  

Tokens embeddings:
tensor([ 0.0274, -0.0069, -0.0215,  0.1776,  0.5664,  0.5234,  0.1472,  0.0931,
         0.5168,  0.1201,  0.2579, -0.0317, -0.0019, -0.0235, -0.0277,  0.0101,
         0.0252,  0.4704, -0.0407,  0.0999,  0.3104,  0.0897, -0.0022,  1.1194,
         0.3100, -0.0109,  0.0165,  0.2662,  0.1708,  0.0316,  0.6025,  0.2662,
        -0.1396, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0069, -0.0215,  

Tokens embeddings:
tensor([ 0.0274, -0.0069, -0.0215,  0.1776,  0.5664,  0.5234,  0.1472,  0.0931,
         0.5168,  0.1201,  0.2579, -0.0317, -0.0019, -0.0235, -0.0277,  0.0101,
         0.0252,  0.4704, -0.0407,  0.0999,  0.3104,  0.0897, -0.0022,  1.1194,
         0.3100, -0.0109,  0.0165,  0.2662,  0.1708,  0.0316,  0.6025,  0.2662,
        -0.1396, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0069, -0.0215,  

Tokens embeddings:
tensor([ 0.0274, -0.0069, -0.0215,  0.1776,  0.5664,  0.5234,  0.1472,  0.0931,
         0.5168,  0.1201,  0.2579, -0.0317, -0.0019, -0.0235, -0.0277,  0.0101,
         0.0252,  0.4704, -0.0407,  0.0999,  0.3104,  0.0897, -0.0022,  1.1194,
         0.3100, -0.0109,  0.0165,  0.2662,  0.1708,  0.0316,  0.6025,  0.2662,
        -0.1396, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0069, -0.0215,  

Tokens embeddings:
tensor([ 0.0274, -0.0069, -0.0215,  0.1776,  0.5664,  0.5234,  0.1472,  0.0931,
         0.5168,  0.1201,  0.2579, -0.0317, -0.0019, -0.0235, -0.0277,  0.0101,
         0.0252,  0.4704, -0.0407,  0.0999,  0.3104,  0.0897, -0.0022,  1.1194,
         0.3100, -0.0109,  0.0165,  0.2662,  0.1708,  0.0316,  0.6025,  0.2662,
        -0.1396, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0069, -0.0215,  

Tokens embeddings:
tensor([ 0.0274, -0.0069, -0.0215,  0.1776,  0.5664,  0.5234,  0.1472,  0.0931,
         0.5168,  0.1201,  0.2579, -0.0317, -0.0019, -0.0235, -0.0277,  0.0101,
         0.0252,  0.4704, -0.0407,  0.0999,  0.3104,  0.0897, -0.0022,  1.1194,
         0.3100, -0.0109,  0.0165,  0.2662,  0.1708,  0.0316,  0.6025,  0.2662,
        -0.1396, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0069, -0.0215,  

Tokens embeddings:
tensor([ 0.0274, -0.0069, -0.0215,  0.1776,  0.5664,  0.5234,  0.1472,  0.0931,
         0.5168,  0.1201,  0.2579, -0.0317, -0.0019, -0.0235, -0.0277,  0.0101,
         0.0252,  0.4704, -0.0407,  0.0999,  0.3104,  0.0897, -0.0022,  1.1194,
         0.3100, -0.0109,  0.0165,  0.2662,  0.1708,  0.0316,  0.6025,  0.2662,
        -0.1396, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0069, -0.0215,  

Tokens embeddings:
tensor([ 0.0274, -0.0069, -0.0215,  0.1776,  0.5664,  0.5234,  0.1472,  0.0931,
         0.5168,  0.1201,  0.2579, -0.0317, -0.0019, -0.0235, -0.0277,  0.0101,
         0.0252,  0.4704, -0.0407,  0.0999,  0.3104,  0.0897, -0.0022,  1.1194,
         0.3100, -0.0109,  0.0165,  0.2662,  0.1708,  0.0316,  0.6025,  0.2662,
        -0.1396, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0069, -0.0215,  

Tokens embeddings:
tensor([ 0.0274, -0.0069, -0.0215,  0.1776,  0.5664,  0.5234,  0.1472,  0.0931,
         0.5168,  0.1201,  0.2579, -0.0317, -0.0019, -0.0235, -0.0277,  0.0101,
         0.0252,  0.4704, -0.0407,  0.0999,  0.3104,  0.0897, -0.0022,  1.1194,
         0.3100, -0.0109,  0.0165,  0.2662,  0.1708,  0.0316,  0.6025,  0.2662,
        -0.1396, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0069, -0.0215,  

Tokens embeddings:
tensor([ 0.0274, -0.0069, -0.0215,  0.1776,  0.5664,  0.5234,  0.1472,  0.0931,
         0.5168,  0.1201,  0.2579, -0.0317, -0.0019, -0.0235, -0.0277,  0.0101,
         0.0252,  0.4704, -0.0407,  0.0999,  0.3104,  0.0897, -0.0022,  1.1194,
         0.3100, -0.0109,  0.0165,  0.2662,  0.1708,  0.0316,  0.6025,  0.2662,
        -0.1396, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0069, -0.0215,  

Tokens embeddings:
tensor([ 0.0274, -0.0069, -0.0215,  0.1776,  0.5664,  0.5234,  0.1472,  0.0931,
         0.5168,  0.1201,  0.2579, -0.0317, -0.0019, -0.0235, -0.0277,  0.0101,
         0.0252,  0.4704, -0.0407,  0.0999,  0.3104,  0.0897, -0.0022,  1.1194,
         0.3100, -0.0109,  0.0165,  0.2662,  0.1708,  0.0316,  0.6025,  0.2662,
        -0.1396, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0069, -0.0215,  

In [31]:
# Pooling First token [CLS] for each sentence - argmax

# Select some sentences randomly
sent_index = [11, 12, 13, 14, 15, 16, 17, 18, 19, 20]

for s in sent_index:
  print("*" * 100)
  # Get the sentence's words
  tokens = test_inputs[s]
  # For each layer...
  for l in range(12):
    print("\nLayer", l+1)
    attention = np.squeeze(test_attentions[s][l].detach().cpu().numpy(), axis=0)
    # and for each head
    #for h, head in enumerate(attention):
    #print("Head", h+1)
    # Get the attention for the cls token
    encoded_tokens = bert_tokenizer(tokens, padding=True, truncation=True, max_length=128, return_tensors='pt')
    encoded_tokens = encoded_tokens.to(device)
    with torch.no_grad():
      model_output1 = model_e(**encoded_tokens)
      tokens_embeddings = model_output1[0][:,0]
      tokens_embeddings = tokens_embeddings.cpu()
      cls_attentions = head[0]
      display(HTML(colorize(tokens, cls_attentions)))
      print("Tokens embeddings:")
      print(tokens_embeddings)
      arg = argmax(tokens_embeddings)
      print('arg max of %s: %d' % (tokens_embeddings, arg))

****************************************************************************************************

Layer 1


Tokens embeddings:
tensor([ 0.0274,  0.4179, -0.1941, -0.0310, -0.0022,  0.8623, -0.0025,  0.2662,
        -0.0137,  0.2400,  0.6456,  0.1184,  0.1553,  0.0588, -0.0636,  0.0041,
        -0.0771,  0.1036,  0.6025,  0.0091, -0.0336, -0.0181,  0.1898,  0.0948,
         0.2470,  0.1504,  0.0079,  0.0165,  1.1194, -0.0277, -0.0031, -0.0677,
         0.8606,  0.6025,  0.1539, -0.0416, -0.0075, -0.0287, -0.0677,  0.8606,
         0.0555,  0.1226, -0.0060, -0.0025,  0.2662,  0.1465,  0.2977,  0.0128,
         0.4498, -0.0484, -0.0769,  0.2234,  0.2662,  0.0034, -0.0022, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.4179, -0.1941, -

Tokens embeddings:
tensor([ 0.0274,  0.4179, -0.1941, -0.0310, -0.0022,  0.8623, -0.0025,  0.2662,
        -0.0137,  0.2400,  0.6456,  0.1184,  0.1553,  0.0588, -0.0636,  0.0041,
        -0.0771,  0.1036,  0.6025,  0.0091, -0.0336, -0.0181,  0.1898,  0.0948,
         0.2470,  0.1504,  0.0079,  0.0165,  1.1194, -0.0277, -0.0031, -0.0677,
         0.8606,  0.6025,  0.1539, -0.0416, -0.0075, -0.0287, -0.0677,  0.8606,
         0.0555,  0.1226, -0.0060, -0.0025,  0.2662,  0.1465,  0.2977,  0.0128,
         0.4498, -0.0484, -0.0769,  0.2234,  0.2662,  0.0034, -0.0022, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.4179, -0.1941, -

Tokens embeddings:
tensor([ 0.0274,  0.4179, -0.1941, -0.0310, -0.0022,  0.8623, -0.0025,  0.2662,
        -0.0137,  0.2400,  0.6456,  0.1184,  0.1553,  0.0588, -0.0636,  0.0041,
        -0.0771,  0.1036,  0.6025,  0.0091, -0.0336, -0.0181,  0.1898,  0.0948,
         0.2470,  0.1504,  0.0079,  0.0165,  1.1194, -0.0277, -0.0031, -0.0677,
         0.8606,  0.6025,  0.1539, -0.0416, -0.0075, -0.0287, -0.0677,  0.8606,
         0.0555,  0.1226, -0.0060, -0.0025,  0.2662,  0.1465,  0.2977,  0.0128,
         0.4498, -0.0484, -0.0769,  0.2234,  0.2662,  0.0034, -0.0022, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.4179, -0.1941, -

Tokens embeddings:
tensor([ 0.0274,  0.4179, -0.1941, -0.0310, -0.0022,  0.8623, -0.0025,  0.2662,
        -0.0137,  0.2400,  0.6456,  0.1184,  0.1553,  0.0588, -0.0636,  0.0041,
        -0.0771,  0.1036,  0.6025,  0.0091, -0.0336, -0.0181,  0.1898,  0.0948,
         0.2470,  0.1504,  0.0079,  0.0165,  1.1194, -0.0277, -0.0031, -0.0677,
         0.8606,  0.6025,  0.1539, -0.0416, -0.0075, -0.0287, -0.0677,  0.8606,
         0.0555,  0.1226, -0.0060, -0.0025,  0.2662,  0.1465,  0.2977,  0.0128,
         0.4498, -0.0484, -0.0769,  0.2234,  0.2662,  0.0034, -0.0022, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.4179, -0.1941, -

Tokens embeddings:
tensor([ 0.0274,  0.4179, -0.1941, -0.0310, -0.0022,  0.8623, -0.0025,  0.2662,
        -0.0137,  0.2400,  0.6456,  0.1184,  0.1553,  0.0588, -0.0636,  0.0041,
        -0.0771,  0.1036,  0.6025,  0.0091, -0.0336, -0.0181,  0.1898,  0.0948,
         0.2470,  0.1504,  0.0079,  0.0165,  1.1194, -0.0277, -0.0031, -0.0677,
         0.8606,  0.6025,  0.1539, -0.0416, -0.0075, -0.0287, -0.0677,  0.8606,
         0.0555,  0.1226, -0.0060, -0.0025,  0.2662,  0.1465,  0.2977,  0.0128,
         0.4498, -0.0484, -0.0769,  0.2234,  0.2662,  0.0034, -0.0022, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.4179, -0.1941, -

Tokens embeddings:
tensor([ 0.0274,  0.4179, -0.1941, -0.0310, -0.0022,  0.8623, -0.0025,  0.2662,
        -0.0137,  0.2400,  0.6456,  0.1184,  0.1553,  0.0588, -0.0636,  0.0041,
        -0.0771,  0.1036,  0.6025,  0.0091, -0.0336, -0.0181,  0.1898,  0.0948,
         0.2470,  0.1504,  0.0079,  0.0165,  1.1194, -0.0277, -0.0031, -0.0677,
         0.8606,  0.6025,  0.1539, -0.0416, -0.0075, -0.0287, -0.0677,  0.8606,
         0.0555,  0.1226, -0.0060, -0.0025,  0.2662,  0.1465,  0.2977,  0.0128,
         0.4498, -0.0484, -0.0769,  0.2234,  0.2662,  0.0034, -0.0022, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.4179, -0.1941, -

Tokens embeddings:
tensor([ 0.0274,  0.4179, -0.1941, -0.0310, -0.0022,  0.8623, -0.0025,  0.2662,
        -0.0137,  0.2400,  0.6456,  0.1184,  0.1553,  0.0588, -0.0636,  0.0041,
        -0.0771,  0.1036,  0.6025,  0.0091, -0.0336, -0.0181,  0.1898,  0.0948,
         0.2470,  0.1504,  0.0079,  0.0165,  1.1194, -0.0277, -0.0031, -0.0677,
         0.8606,  0.6025,  0.1539, -0.0416, -0.0075, -0.0287, -0.0677,  0.8606,
         0.0555,  0.1226, -0.0060, -0.0025,  0.2662,  0.1465,  0.2977,  0.0128,
         0.4498, -0.0484, -0.0769,  0.2234,  0.2662,  0.0034, -0.0022, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.4179, -0.1941, -

Tokens embeddings:
tensor([ 0.0274,  0.4179, -0.1941, -0.0310, -0.0022,  0.8623, -0.0025,  0.2662,
        -0.0137,  0.2400,  0.6456,  0.1184,  0.1553,  0.0588, -0.0636,  0.0041,
        -0.0771,  0.1036,  0.6025,  0.0091, -0.0336, -0.0181,  0.1898,  0.0948,
         0.2470,  0.1504,  0.0079,  0.0165,  1.1194, -0.0277, -0.0031, -0.0677,
         0.8606,  0.6025,  0.1539, -0.0416, -0.0075, -0.0287, -0.0677,  0.8606,
         0.0555,  0.1226, -0.0060, -0.0025,  0.2662,  0.1465,  0.2977,  0.0128,
         0.4498, -0.0484, -0.0769,  0.2234,  0.2662,  0.0034, -0.0022, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.4179, -0.1941, -

Tokens embeddings:
tensor([ 0.0274,  0.4179, -0.1941, -0.0310, -0.0022,  0.8623, -0.0025,  0.2662,
        -0.0137,  0.2400,  0.6456,  0.1184,  0.1553,  0.0588, -0.0636,  0.0041,
        -0.0771,  0.1036,  0.6025,  0.0091, -0.0336, -0.0181,  0.1898,  0.0948,
         0.2470,  0.1504,  0.0079,  0.0165,  1.1194, -0.0277, -0.0031, -0.0677,
         0.8606,  0.6025,  0.1539, -0.0416, -0.0075, -0.0287, -0.0677,  0.8606,
         0.0555,  0.1226, -0.0060, -0.0025,  0.2662,  0.1465,  0.2977,  0.0128,
         0.4498, -0.0484, -0.0769,  0.2234,  0.2662,  0.0034, -0.0022, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.4179, -0.1941, -

Tokens embeddings:
tensor([ 0.0274,  0.4179, -0.1941, -0.0310, -0.0022,  0.8623, -0.0025,  0.2662,
        -0.0137,  0.2400,  0.6456,  0.1184,  0.1553,  0.0588, -0.0636,  0.0041,
        -0.0771,  0.1036,  0.6025,  0.0091, -0.0336, -0.0181,  0.1898,  0.0948,
         0.2470,  0.1504,  0.0079,  0.0165,  1.1194, -0.0277, -0.0031, -0.0677,
         0.8606,  0.6025,  0.1539, -0.0416, -0.0075, -0.0287, -0.0677,  0.8606,
         0.0555,  0.1226, -0.0060, -0.0025,  0.2662,  0.1465,  0.2977,  0.0128,
         0.4498, -0.0484, -0.0769,  0.2234,  0.2662,  0.0034, -0.0022, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.4179, -0.1941, -

Tokens embeddings:
tensor([ 0.0274,  0.4179, -0.1941, -0.0310, -0.0022,  0.8623, -0.0025,  0.2662,
        -0.0137,  0.2400,  0.6456,  0.1184,  0.1553,  0.0588, -0.0636,  0.0041,
        -0.0771,  0.1036,  0.6025,  0.0091, -0.0336, -0.0181,  0.1898,  0.0948,
         0.2470,  0.1504,  0.0079,  0.0165,  1.1194, -0.0277, -0.0031, -0.0677,
         0.8606,  0.6025,  0.1539, -0.0416, -0.0075, -0.0287, -0.0677,  0.8606,
         0.0555,  0.1226, -0.0060, -0.0025,  0.2662,  0.1465,  0.2977,  0.0128,
         0.4498, -0.0484, -0.0769,  0.2234,  0.2662,  0.0034, -0.0022, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.4179, -0.1941, -

Tokens embeddings:
tensor([ 0.0274,  0.4179, -0.1941, -0.0310, -0.0022,  0.8623, -0.0025,  0.2662,
        -0.0137,  0.2400,  0.6456,  0.1184,  0.1553,  0.0588, -0.0636,  0.0041,
        -0.0771,  0.1036,  0.6025,  0.0091, -0.0336, -0.0181,  0.1898,  0.0948,
         0.2470,  0.1504,  0.0079,  0.0165,  1.1194, -0.0277, -0.0031, -0.0677,
         0.8606,  0.6025,  0.1539, -0.0416, -0.0075, -0.0287, -0.0677,  0.8606,
         0.0555,  0.1226, -0.0060, -0.0025,  0.2662,  0.1465,  0.2977,  0.0128,
         0.4498, -0.0484, -0.0769,  0.2234,  0.2662,  0.0034, -0.0022, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.4179, -0.1941, -

Tokens embeddings:
tensor([ 0.0274,  0.2599,  0.4498,  0.2977, -0.0123, -0.0208,  0.1038,  0.0372,
        -0.0022,  0.0948,  0.2977, -0.0925,  0.0076,  0.0846,  0.2977,  0.1405,
        -0.0277,  0.0101,  0.0431, -0.0142,  0.0486, -0.0022,  0.0440,  0.5007,
        -0.0202, -0.0022,  0.0315,  0.2662,  0.1465,  0.2470,  0.0052, -0.0151,
         0.0948,  0.2977,  0.0437,  0.0603, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.2599,  0.4498,  

Tokens embeddings:
tensor([ 0.0274,  0.2599,  0.4498,  0.2977, -0.0123, -0.0208,  0.1038,  0.0372,
        -0.0022,  0.0948,  0.2977, -0.0925,  0.0076,  0.0846,  0.2977,  0.1405,
        -0.0277,  0.0101,  0.0431, -0.0142,  0.0486, -0.0022,  0.0440,  0.5007,
        -0.0202, -0.0022,  0.0315,  0.2662,  0.1465,  0.2470,  0.0052, -0.0151,
         0.0948,  0.2977,  0.0437,  0.0603, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.2599,  0.4498,  

Tokens embeddings:
tensor([ 0.0274,  0.2599,  0.4498,  0.2977, -0.0123, -0.0208,  0.1038,  0.0372,
        -0.0022,  0.0948,  0.2977, -0.0925,  0.0076,  0.0846,  0.2977,  0.1405,
        -0.0277,  0.0101,  0.0431, -0.0142,  0.0486, -0.0022,  0.0440,  0.5007,
        -0.0202, -0.0022,  0.0315,  0.2662,  0.1465,  0.2470,  0.0052, -0.0151,
         0.0948,  0.2977,  0.0437,  0.0603, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.2599,  0.4498,  

Tokens embeddings:
tensor([ 0.0274,  0.2599,  0.4498,  0.2977, -0.0123, -0.0208,  0.1038,  0.0372,
        -0.0022,  0.0948,  0.2977, -0.0925,  0.0076,  0.0846,  0.2977,  0.1405,
        -0.0277,  0.0101,  0.0431, -0.0142,  0.0486, -0.0022,  0.0440,  0.5007,
        -0.0202, -0.0022,  0.0315,  0.2662,  0.1465,  0.2470,  0.0052, -0.0151,
         0.0948,  0.2977,  0.0437,  0.0603, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.2599,  0.4498,  

Tokens embeddings:
tensor([ 0.0274,  0.2599,  0.4498,  0.2977, -0.0123, -0.0208,  0.1038,  0.0372,
        -0.0022,  0.0948,  0.2977, -0.0925,  0.0076,  0.0846,  0.2977,  0.1405,
        -0.0277,  0.0101,  0.0431, -0.0142,  0.0486, -0.0022,  0.0440,  0.5007,
        -0.0202, -0.0022,  0.0315,  0.2662,  0.1465,  0.2470,  0.0052, -0.0151,
         0.0948,  0.2977,  0.0437,  0.0603, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.2599,  0.4498,  

Tokens embeddings:
tensor([ 0.0274,  0.2599,  0.4498,  0.2977, -0.0123, -0.0208,  0.1038,  0.0372,
        -0.0022,  0.0948,  0.2977, -0.0925,  0.0076,  0.0846,  0.2977,  0.1405,
        -0.0277,  0.0101,  0.0431, -0.0142,  0.0486, -0.0022,  0.0440,  0.5007,
        -0.0202, -0.0022,  0.0315,  0.2662,  0.1465,  0.2470,  0.0052, -0.0151,
         0.0948,  0.2977,  0.0437,  0.0603, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.2599,  0.4498,  

Tokens embeddings:
tensor([ 0.0274,  0.2599,  0.4498,  0.2977, -0.0123, -0.0208,  0.1038,  0.0372,
        -0.0022,  0.0948,  0.2977, -0.0925,  0.0076,  0.0846,  0.2977,  0.1405,
        -0.0277,  0.0101,  0.0431, -0.0142,  0.0486, -0.0022,  0.0440,  0.5007,
        -0.0202, -0.0022,  0.0315,  0.2662,  0.1465,  0.2470,  0.0052, -0.0151,
         0.0948,  0.2977,  0.0437,  0.0603, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.2599,  0.4498,  

Tokens embeddings:
tensor([ 0.0274,  0.2599,  0.4498,  0.2977, -0.0123, -0.0208,  0.1038,  0.0372,
        -0.0022,  0.0948,  0.2977, -0.0925,  0.0076,  0.0846,  0.2977,  0.1405,
        -0.0277,  0.0101,  0.0431, -0.0142,  0.0486, -0.0022,  0.0440,  0.5007,
        -0.0202, -0.0022,  0.0315,  0.2662,  0.1465,  0.2470,  0.0052, -0.0151,
         0.0948,  0.2977,  0.0437,  0.0603, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.2599,  0.4498,  

Tokens embeddings:
tensor([ 0.0274,  0.2599,  0.4498,  0.2977, -0.0123, -0.0208,  0.1038,  0.0372,
        -0.0022,  0.0948,  0.2977, -0.0925,  0.0076,  0.0846,  0.2977,  0.1405,
        -0.0277,  0.0101,  0.0431, -0.0142,  0.0486, -0.0022,  0.0440,  0.5007,
        -0.0202, -0.0022,  0.0315,  0.2662,  0.1465,  0.2470,  0.0052, -0.0151,
         0.0948,  0.2977,  0.0437,  0.0603, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.2599,  0.4498,  

Tokens embeddings:
tensor([ 0.0274,  0.2599,  0.4498,  0.2977, -0.0123, -0.0208,  0.1038,  0.0372,
        -0.0022,  0.0948,  0.2977, -0.0925,  0.0076,  0.0846,  0.2977,  0.1405,
        -0.0277,  0.0101,  0.0431, -0.0142,  0.0486, -0.0022,  0.0440,  0.5007,
        -0.0202, -0.0022,  0.0315,  0.2662,  0.1465,  0.2470,  0.0052, -0.0151,
         0.0948,  0.2977,  0.0437,  0.0603, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.2599,  0.4498,  

Tokens embeddings:
tensor([ 0.0274,  0.2599,  0.4498,  0.2977, -0.0123, -0.0208,  0.1038,  0.0372,
        -0.0022,  0.0948,  0.2977, -0.0925,  0.0076,  0.0846,  0.2977,  0.1405,
        -0.0277,  0.0101,  0.0431, -0.0142,  0.0486, -0.0022,  0.0440,  0.5007,
        -0.0202, -0.0022,  0.0315,  0.2662,  0.1465,  0.2470,  0.0052, -0.0151,
         0.0948,  0.2977,  0.0437,  0.0603, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.2599,  0.4498,  

Tokens embeddings:
tensor([ 0.0274,  0.2599,  0.4498,  0.2977, -0.0123, -0.0208,  0.1038,  0.0372,
        -0.0022,  0.0948,  0.2977, -0.0925,  0.0076,  0.0846,  0.2977,  0.1405,
        -0.0277,  0.0101,  0.0431, -0.0142,  0.0486, -0.0022,  0.0440,  0.5007,
        -0.0202, -0.0022,  0.0315,  0.2662,  0.1465,  0.2470,  0.0052, -0.0151,
         0.0948,  0.2977,  0.0437,  0.0603, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.2599,  0.4498,  

Tokens embeddings:
tensor([ 0.0274,  0.0177, -0.0144,  0.8606, -0.0119, -0.0250, -0.0022, -0.0277,
         0.0101,  0.2470, -0.0021,  0.5664, -0.0202, -0.0022,  0.0398,  0.0948,
         0.2977, -0.5207,  0.0886, -0.0019,  0.0985,  0.1781,  0.0146, -0.0277,
         0.0101, -0.0311,  0.0172,  0.8606, -0.0131,  0.0082, -0.0194, -0.0215,
         0.2327, -0.0022, -0.0092,  0.4498,  0.2470, -0.0629,  0.0948,  0.2478,
         0.0413,  0.0431,  0.1562,  0.0202,  0.2662,  0.0407,  0.2470,  0.1366,
         0.4498,  0.0188, -0.0022,  0.2720,  0.2470,  0.3849, -0.0019,  0.0948,
         0.0520, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0177, -0.0144,  

Tokens embeddings:
tensor([ 0.0274,  0.0177, -0.0144,  0.8606, -0.0119, -0.0250, -0.0022, -0.0277,
         0.0101,  0.2470, -0.0021,  0.5664, -0.0202, -0.0022,  0.0398,  0.0948,
         0.2977, -0.5207,  0.0886, -0.0019,  0.0985,  0.1781,  0.0146, -0.0277,
         0.0101, -0.0311,  0.0172,  0.8606, -0.0131,  0.0082, -0.0194, -0.0215,
         0.2327, -0.0022, -0.0092,  0.4498,  0.2470, -0.0629,  0.0948,  0.2478,
         0.0413,  0.0431,  0.1562,  0.0202,  0.2662,  0.0407,  0.2470,  0.1366,
         0.4498,  0.0188, -0.0022,  0.2720,  0.2470,  0.3849, -0.0019,  0.0948,
         0.0520, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0177, -0.0144,  

Tokens embeddings:
tensor([ 0.0274,  0.0177, -0.0144,  0.8606, -0.0119, -0.0250, -0.0022, -0.0277,
         0.0101,  0.2470, -0.0021,  0.5664, -0.0202, -0.0022,  0.0398,  0.0948,
         0.2977, -0.5207,  0.0886, -0.0019,  0.0985,  0.1781,  0.0146, -0.0277,
         0.0101, -0.0311,  0.0172,  0.8606, -0.0131,  0.0082, -0.0194, -0.0215,
         0.2327, -0.0022, -0.0092,  0.4498,  0.2470, -0.0629,  0.0948,  0.2478,
         0.0413,  0.0431,  0.1562,  0.0202,  0.2662,  0.0407,  0.2470,  0.1366,
         0.4498,  0.0188, -0.0022,  0.2720,  0.2470,  0.3849, -0.0019,  0.0948,
         0.0520, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0177, -0.0144,  

Tokens embeddings:
tensor([ 0.0274,  0.0177, -0.0144,  0.8606, -0.0119, -0.0250, -0.0022, -0.0277,
         0.0101,  0.2470, -0.0021,  0.5664, -0.0202, -0.0022,  0.0398,  0.0948,
         0.2977, -0.5207,  0.0886, -0.0019,  0.0985,  0.1781,  0.0146, -0.0277,
         0.0101, -0.0311,  0.0172,  0.8606, -0.0131,  0.0082, -0.0194, -0.0215,
         0.2327, -0.0022, -0.0092,  0.4498,  0.2470, -0.0629,  0.0948,  0.2478,
         0.0413,  0.0431,  0.1562,  0.0202,  0.2662,  0.0407,  0.2470,  0.1366,
         0.4498,  0.0188, -0.0022,  0.2720,  0.2470,  0.3849, -0.0019,  0.0948,
         0.0520, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0177, -0.0144,  

Tokens embeddings:
tensor([ 0.0274,  0.0177, -0.0144,  0.8606, -0.0119, -0.0250, -0.0022, -0.0277,
         0.0101,  0.2470, -0.0021,  0.5664, -0.0202, -0.0022,  0.0398,  0.0948,
         0.2977, -0.5207,  0.0886, -0.0019,  0.0985,  0.1781,  0.0146, -0.0277,
         0.0101, -0.0311,  0.0172,  0.8606, -0.0131,  0.0082, -0.0194, -0.0215,
         0.2327, -0.0022, -0.0092,  0.4498,  0.2470, -0.0629,  0.0948,  0.2478,
         0.0413,  0.0431,  0.1562,  0.0202,  0.2662,  0.0407,  0.2470,  0.1366,
         0.4498,  0.0188, -0.0022,  0.2720,  0.2470,  0.3849, -0.0019,  0.0948,
         0.0520, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0177, -0.0144,  

Tokens embeddings:
tensor([ 0.0274,  0.0177, -0.0144,  0.8606, -0.0119, -0.0250, -0.0022, -0.0277,
         0.0101,  0.2470, -0.0021,  0.5664, -0.0202, -0.0022,  0.0398,  0.0948,
         0.2977, -0.5207,  0.0886, -0.0019,  0.0985,  0.1781,  0.0146, -0.0277,
         0.0101, -0.0311,  0.0172,  0.8606, -0.0131,  0.0082, -0.0194, -0.0215,
         0.2327, -0.0022, -0.0092,  0.4498,  0.2470, -0.0629,  0.0948,  0.2478,
         0.0413,  0.0431,  0.1562,  0.0202,  0.2662,  0.0407,  0.2470,  0.1366,
         0.4498,  0.0188, -0.0022,  0.2720,  0.2470,  0.3849, -0.0019,  0.0948,
         0.0520, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0177, -0.0144,  

Tokens embeddings:
tensor([ 0.0274,  0.0177, -0.0144,  0.8606, -0.0119, -0.0250, -0.0022, -0.0277,
         0.0101,  0.2470, -0.0021,  0.5664, -0.0202, -0.0022,  0.0398,  0.0948,
         0.2977, -0.5207,  0.0886, -0.0019,  0.0985,  0.1781,  0.0146, -0.0277,
         0.0101, -0.0311,  0.0172,  0.8606, -0.0131,  0.0082, -0.0194, -0.0215,
         0.2327, -0.0022, -0.0092,  0.4498,  0.2470, -0.0629,  0.0948,  0.2478,
         0.0413,  0.0431,  0.1562,  0.0202,  0.2662,  0.0407,  0.2470,  0.1366,
         0.4498,  0.0188, -0.0022,  0.2720,  0.2470,  0.3849, -0.0019,  0.0948,
         0.0520, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0177, -0.0144,  

Tokens embeddings:
tensor([ 0.0274,  0.0177, -0.0144,  0.8606, -0.0119, -0.0250, -0.0022, -0.0277,
         0.0101,  0.2470, -0.0021,  0.5664, -0.0202, -0.0022,  0.0398,  0.0948,
         0.2977, -0.5207,  0.0886, -0.0019,  0.0985,  0.1781,  0.0146, -0.0277,
         0.0101, -0.0311,  0.0172,  0.8606, -0.0131,  0.0082, -0.0194, -0.0215,
         0.2327, -0.0022, -0.0092,  0.4498,  0.2470, -0.0629,  0.0948,  0.2478,
         0.0413,  0.0431,  0.1562,  0.0202,  0.2662,  0.0407,  0.2470,  0.1366,
         0.4498,  0.0188, -0.0022,  0.2720,  0.2470,  0.3849, -0.0019,  0.0948,
         0.0520, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0177, -0.0144,  

Tokens embeddings:
tensor([ 0.0274,  0.0177, -0.0144,  0.8606, -0.0119, -0.0250, -0.0022, -0.0277,
         0.0101,  0.2470, -0.0021,  0.5664, -0.0202, -0.0022,  0.0398,  0.0948,
         0.2977, -0.5207,  0.0886, -0.0019,  0.0985,  0.1781,  0.0146, -0.0277,
         0.0101, -0.0311,  0.0172,  0.8606, -0.0131,  0.0082, -0.0194, -0.0215,
         0.2327, -0.0022, -0.0092,  0.4498,  0.2470, -0.0629,  0.0948,  0.2478,
         0.0413,  0.0431,  0.1562,  0.0202,  0.2662,  0.0407,  0.2470,  0.1366,
         0.4498,  0.0188, -0.0022,  0.2720,  0.2470,  0.3849, -0.0019,  0.0948,
         0.0520, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0177, -0.0144,  

Tokens embeddings:
tensor([ 0.0274,  0.0177, -0.0144,  0.8606, -0.0119, -0.0250, -0.0022, -0.0277,
         0.0101,  0.2470, -0.0021,  0.5664, -0.0202, -0.0022,  0.0398,  0.0948,
         0.2977, -0.5207,  0.0886, -0.0019,  0.0985,  0.1781,  0.0146, -0.0277,
         0.0101, -0.0311,  0.0172,  0.8606, -0.0131,  0.0082, -0.0194, -0.0215,
         0.2327, -0.0022, -0.0092,  0.4498,  0.2470, -0.0629,  0.0948,  0.2478,
         0.0413,  0.0431,  0.1562,  0.0202,  0.2662,  0.0407,  0.2470,  0.1366,
         0.4498,  0.0188, -0.0022,  0.2720,  0.2470,  0.3849, -0.0019,  0.0948,
         0.0520, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0177, -0.0144,  

Tokens embeddings:
tensor([ 0.0274,  0.0177, -0.0144,  0.8606, -0.0119, -0.0250, -0.0022, -0.0277,
         0.0101,  0.2470, -0.0021,  0.5664, -0.0202, -0.0022,  0.0398,  0.0948,
         0.2977, -0.5207,  0.0886, -0.0019,  0.0985,  0.1781,  0.0146, -0.0277,
         0.0101, -0.0311,  0.0172,  0.8606, -0.0131,  0.0082, -0.0194, -0.0215,
         0.2327, -0.0022, -0.0092,  0.4498,  0.2470, -0.0629,  0.0948,  0.2478,
         0.0413,  0.0431,  0.1562,  0.0202,  0.2662,  0.0407,  0.2470,  0.1366,
         0.4498,  0.0188, -0.0022,  0.2720,  0.2470,  0.3849, -0.0019,  0.0948,
         0.0520, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0177, -0.0144,  

Tokens embeddings:
tensor([ 0.0274,  0.0177, -0.0144,  0.8606, -0.0119, -0.0250, -0.0022, -0.0277,
         0.0101,  0.2470, -0.0021,  0.5664, -0.0202, -0.0022,  0.0398,  0.0948,
         0.2977, -0.5207,  0.0886, -0.0019,  0.0985,  0.1781,  0.0146, -0.0277,
         0.0101, -0.0311,  0.0172,  0.8606, -0.0131,  0.0082, -0.0194, -0.0215,
         0.2327, -0.0022, -0.0092,  0.4498,  0.2470, -0.0629,  0.0948,  0.2478,
         0.0413,  0.0431,  0.1562,  0.0202,  0.2662,  0.0407,  0.2470,  0.1366,
         0.4498,  0.0188, -0.0022,  0.2720,  0.2470,  0.3849, -0.0019,  0.0948,
         0.0520, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0177, -0.0144,  

Tokens embeddings:
tensor([ 0.0274, -0.1941,  0.4287,  0.3056, -0.0079,  0.6855, -0.0277, -0.1133,
         0.3100,  0.0275,  0.1348,  0.1708, -0.0092, -0.0014, -0.0022, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941,  0.4287,  

Tokens embeddings:
tensor([ 0.0274, -0.1941,  0.4287,  0.3056, -0.0079,  0.6855, -0.0277, -0.1133,
         0.3100,  0.0275,  0.1348,  0.1708, -0.0092, -0.0014, -0.0022, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941,  0.4287,  

Tokens embeddings:
tensor([ 0.0274, -0.1941,  0.4287,  0.3056, -0.0079,  0.6855, -0.0277, -0.1133,
         0.3100,  0.0275,  0.1348,  0.1708, -0.0092, -0.0014, -0.0022, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941,  0.4287,  

Tokens embeddings:
tensor([ 0.0274, -0.1941,  0.4287,  0.3056, -0.0079,  0.6855, -0.0277, -0.1133,
         0.3100,  0.0275,  0.1348,  0.1708, -0.0092, -0.0014, -0.0022, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941,  0.4287,  

Tokens embeddings:
tensor([ 0.0274, -0.1941,  0.4287,  0.3056, -0.0079,  0.6855, -0.0277, -0.1133,
         0.3100,  0.0275,  0.1348,  0.1708, -0.0092, -0.0014, -0.0022, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941,  0.4287,  

Tokens embeddings:
tensor([ 0.0274, -0.1941,  0.4287,  0.3056, -0.0079,  0.6855, -0.0277, -0.1133,
         0.3100,  0.0275,  0.1348,  0.1708, -0.0092, -0.0014, -0.0022, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941,  0.4287,  

Tokens embeddings:
tensor([ 0.0274, -0.1941,  0.4287,  0.3056, -0.0079,  0.6855, -0.0277, -0.1133,
         0.3100,  0.0275,  0.1348,  0.1708, -0.0092, -0.0014, -0.0022, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941,  0.4287,  

Tokens embeddings:
tensor([ 0.0274, -0.1941,  0.4287,  0.3056, -0.0079,  0.6855, -0.0277, -0.1133,
         0.3100,  0.0275,  0.1348,  0.1708, -0.0092, -0.0014, -0.0022, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941,  0.4287,  

Tokens embeddings:
tensor([ 0.0274, -0.1941,  0.4287,  0.3056, -0.0079,  0.6855, -0.0277, -0.1133,
         0.3100,  0.0275,  0.1348,  0.1708, -0.0092, -0.0014, -0.0022, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941,  0.4287,  

Tokens embeddings:
tensor([ 0.0274, -0.1941,  0.4287,  0.3056, -0.0079,  0.6855, -0.0277, -0.1133,
         0.3100,  0.0275,  0.1348,  0.1708, -0.0092, -0.0014, -0.0022, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941,  0.4287,  

Tokens embeddings:
tensor([ 0.0274, -0.1941,  0.4287,  0.3056, -0.0079,  0.6855, -0.0277, -0.1133,
         0.3100,  0.0275,  0.1348,  0.1708, -0.0092, -0.0014, -0.0022, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941,  0.4287,  

Tokens embeddings:
tensor([ 0.0274, -0.1941,  0.4287,  0.3056, -0.0079,  0.6855, -0.0277, -0.1133,
         0.3100,  0.0275,  0.1348,  0.1708, -0.0092, -0.0014, -0.0022, -0.0277,
         0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941,  0.4287,  

Tokens embeddings:
tensor([ 0.0274,  0.0182, -0.0022, -0.0056, -0.0212, -0.0202, -0.0215,  0.2977,
         0.1299, -0.0754, -0.0111, -0.0099,  0.1348,  0.0068,  0.1534,  0.1294,
         0.1728, -0.0098, -0.0022, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0182, -0.0022, -

Tokens embeddings:
tensor([ 0.0274,  0.0182, -0.0022, -0.0056, -0.0212, -0.0202, -0.0215,  0.2977,
         0.1299, -0.0754, -0.0111, -0.0099,  0.1348,  0.0068,  0.1534,  0.1294,
         0.1728, -0.0098, -0.0022, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0182, -0.0022, -

Tokens embeddings:
tensor([ 0.0274,  0.0182, -0.0022, -0.0056, -0.0212, -0.0202, -0.0215,  0.2977,
         0.1299, -0.0754, -0.0111, -0.0099,  0.1348,  0.0068,  0.1534,  0.1294,
         0.1728, -0.0098, -0.0022, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0182, -0.0022, -

Tokens embeddings:
tensor([ 0.0274,  0.0182, -0.0022, -0.0056, -0.0212, -0.0202, -0.0215,  0.2977,
         0.1299, -0.0754, -0.0111, -0.0099,  0.1348,  0.0068,  0.1534,  0.1294,
         0.1728, -0.0098, -0.0022, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0182, -0.0022, -

Tokens embeddings:
tensor([ 0.0274,  0.0182, -0.0022, -0.0056, -0.0212, -0.0202, -0.0215,  0.2977,
         0.1299, -0.0754, -0.0111, -0.0099,  0.1348,  0.0068,  0.1534,  0.1294,
         0.1728, -0.0098, -0.0022, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0182, -0.0022, -

Tokens embeddings:
tensor([ 0.0274,  0.0182, -0.0022, -0.0056, -0.0212, -0.0202, -0.0215,  0.2977,
         0.1299, -0.0754, -0.0111, -0.0099,  0.1348,  0.0068,  0.1534,  0.1294,
         0.1728, -0.0098, -0.0022, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0182, -0.0022, -

Tokens embeddings:
tensor([ 0.0274,  0.0182, -0.0022, -0.0056, -0.0212, -0.0202, -0.0215,  0.2977,
         0.1299, -0.0754, -0.0111, -0.0099,  0.1348,  0.0068,  0.1534,  0.1294,
         0.1728, -0.0098, -0.0022, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0182, -0.0022, -

Tokens embeddings:
tensor([ 0.0274,  0.0182, -0.0022, -0.0056, -0.0212, -0.0202, -0.0215,  0.2977,
         0.1299, -0.0754, -0.0111, -0.0099,  0.1348,  0.0068,  0.1534,  0.1294,
         0.1728, -0.0098, -0.0022, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0182, -0.0022, -

Tokens embeddings:
tensor([ 0.0274,  0.0182, -0.0022, -0.0056, -0.0212, -0.0202, -0.0215,  0.2977,
         0.1299, -0.0754, -0.0111, -0.0099,  0.1348,  0.0068,  0.1534,  0.1294,
         0.1728, -0.0098, -0.0022, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0182, -0.0022, -

Tokens embeddings:
tensor([ 0.0274,  0.0182, -0.0022, -0.0056, -0.0212, -0.0202, -0.0215,  0.2977,
         0.1299, -0.0754, -0.0111, -0.0099,  0.1348,  0.0068,  0.1534,  0.1294,
         0.1728, -0.0098, -0.0022, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0182, -0.0022, -

Tokens embeddings:
tensor([ 0.0274,  0.0182, -0.0022, -0.0056, -0.0212, -0.0202, -0.0215,  0.2977,
         0.1299, -0.0754, -0.0111, -0.0099,  0.1348,  0.0068,  0.1534,  0.1294,
         0.1728, -0.0098, -0.0022, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0182, -0.0022, -

Tokens embeddings:
tensor([ 0.0274,  0.0182, -0.0022, -0.0056, -0.0212, -0.0202, -0.0215,  0.2977,
         0.1299, -0.0754, -0.0111, -0.0099,  0.1348,  0.0068,  0.1534,  0.1294,
         0.1728, -0.0098, -0.0022, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0182, -0.0022, -

Tokens embeddings:
tensor([ 0.0274,  0.0824,  0.0172,  0.8606,  0.0253, -0.0164,  0.1011,  0.0098,
         0.1705,  0.0234,  0.2478,  0.0561, -0.0277,  0.0101,  0.0056,  0.0684,
         1.0145, -0.0079,  0.3957, -0.0202, -0.0022,  0.0159, -0.0022,  0.0263,
         0.0250,  0.4498,  0.2977, -0.0085, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0824,  0.0172,  

Tokens embeddings:
tensor([ 0.0274,  0.0824,  0.0172,  0.8606,  0.0253, -0.0164,  0.1011,  0.0098,
         0.1705,  0.0234,  0.2478,  0.0561, -0.0277,  0.0101,  0.0056,  0.0684,
         1.0145, -0.0079,  0.3957, -0.0202, -0.0022,  0.0159, -0.0022,  0.0263,
         0.0250,  0.4498,  0.2977, -0.0085, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0824,  0.0172,  

Tokens embeddings:
tensor([ 0.0274,  0.0824,  0.0172,  0.8606,  0.0253, -0.0164,  0.1011,  0.0098,
         0.1705,  0.0234,  0.2478,  0.0561, -0.0277,  0.0101,  0.0056,  0.0684,
         1.0145, -0.0079,  0.3957, -0.0202, -0.0022,  0.0159, -0.0022,  0.0263,
         0.0250,  0.4498,  0.2977, -0.0085, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0824,  0.0172,  

Tokens embeddings:
tensor([ 0.0274,  0.0824,  0.0172,  0.8606,  0.0253, -0.0164,  0.1011,  0.0098,
         0.1705,  0.0234,  0.2478,  0.0561, -0.0277,  0.0101,  0.0056,  0.0684,
         1.0145, -0.0079,  0.3957, -0.0202, -0.0022,  0.0159, -0.0022,  0.0263,
         0.0250,  0.4498,  0.2977, -0.0085, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0824,  0.0172,  

Tokens embeddings:
tensor([ 0.0274,  0.0824,  0.0172,  0.8606,  0.0253, -0.0164,  0.1011,  0.0098,
         0.1705,  0.0234,  0.2478,  0.0561, -0.0277,  0.0101,  0.0056,  0.0684,
         1.0145, -0.0079,  0.3957, -0.0202, -0.0022,  0.0159, -0.0022,  0.0263,
         0.0250,  0.4498,  0.2977, -0.0085, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0824,  0.0172,  

Tokens embeddings:
tensor([ 0.0274,  0.0824,  0.0172,  0.8606,  0.0253, -0.0164,  0.1011,  0.0098,
         0.1705,  0.0234,  0.2478,  0.0561, -0.0277,  0.0101,  0.0056,  0.0684,
         1.0145, -0.0079,  0.3957, -0.0202, -0.0022,  0.0159, -0.0022,  0.0263,
         0.0250,  0.4498,  0.2977, -0.0085, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0824,  0.0172,  

Tokens embeddings:
tensor([ 0.0274,  0.0824,  0.0172,  0.8606,  0.0253, -0.0164,  0.1011,  0.0098,
         0.1705,  0.0234,  0.2478,  0.0561, -0.0277,  0.0101,  0.0056,  0.0684,
         1.0145, -0.0079,  0.3957, -0.0202, -0.0022,  0.0159, -0.0022,  0.0263,
         0.0250,  0.4498,  0.2977, -0.0085, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0824,  0.0172,  

Tokens embeddings:
tensor([ 0.0274,  0.0824,  0.0172,  0.8606,  0.0253, -0.0164,  0.1011,  0.0098,
         0.1705,  0.0234,  0.2478,  0.0561, -0.0277,  0.0101,  0.0056,  0.0684,
         1.0145, -0.0079,  0.3957, -0.0202, -0.0022,  0.0159, -0.0022,  0.0263,
         0.0250,  0.4498,  0.2977, -0.0085, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0824,  0.0172,  

Tokens embeddings:
tensor([ 0.0274,  0.0824,  0.0172,  0.8606,  0.0253, -0.0164,  0.1011,  0.0098,
         0.1705,  0.0234,  0.2478,  0.0561, -0.0277,  0.0101,  0.0056,  0.0684,
         1.0145, -0.0079,  0.3957, -0.0202, -0.0022,  0.0159, -0.0022,  0.0263,
         0.0250,  0.4498,  0.2977, -0.0085, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0824,  0.0172,  

Tokens embeddings:
tensor([ 0.0274,  0.0824,  0.0172,  0.8606,  0.0253, -0.0164,  0.1011,  0.0098,
         0.1705,  0.0234,  0.2478,  0.0561, -0.0277,  0.0101,  0.0056,  0.0684,
         1.0145, -0.0079,  0.3957, -0.0202, -0.0022,  0.0159, -0.0022,  0.0263,
         0.0250,  0.4498,  0.2977, -0.0085, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0824,  0.0172,  

Tokens embeddings:
tensor([ 0.0274,  0.0824,  0.0172,  0.8606,  0.0253, -0.0164,  0.1011,  0.0098,
         0.1705,  0.0234,  0.2478,  0.0561, -0.0277,  0.0101,  0.0056,  0.0684,
         1.0145, -0.0079,  0.3957, -0.0202, -0.0022,  0.0159, -0.0022,  0.0263,
         0.0250,  0.4498,  0.2977, -0.0085, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0824,  0.0172,  

Tokens embeddings:
tensor([ 0.0274,  0.0824,  0.0172,  0.8606,  0.0253, -0.0164,  0.1011,  0.0098,
         0.1705,  0.0234,  0.2478,  0.0561, -0.0277,  0.0101,  0.0056,  0.0684,
         1.0145, -0.0079,  0.3957, -0.0202, -0.0022,  0.0159, -0.0022,  0.0263,
         0.0250,  0.4498,  0.2977, -0.0085, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0824,  0.0172,  

Tokens embeddings:
tensor([ 0.0274,  0.0172,  0.8606,  0.0484,  0.0042,  0.8046, -0.0311,  0.2977,
        -0.0198,  0.4093,  0.1202,  0.1368,  0.1021,  0.0066,  0.2662, -0.0522,
         0.3056, -0.0546,  0.0444,  1.4744, -0.0277,  0.0194,  0.0303, -0.0148,
         0.9803,  0.6025, -0.0590,  0.3285, -0.0086, -0.0198,  0.4093,  0.1294,
         0.2977,  0.0897,  0.4498,  0.0074,  0.2977,  0.1021, -0.0022,  0.4133,
         0.3115,  0.0948,  0.2562, -0.0277,  0.0101, -0.0198,  0.1562, -0.0311,
        -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0172,  0.8606,  

Tokens embeddings:
tensor([ 0.0274,  0.0172,  0.8606,  0.0484,  0.0042,  0.8046, -0.0311,  0.2977,
        -0.0198,  0.4093,  0.1202,  0.1368,  0.1021,  0.0066,  0.2662, -0.0522,
         0.3056, -0.0546,  0.0444,  1.4744, -0.0277,  0.0194,  0.0303, -0.0148,
         0.9803,  0.6025, -0.0590,  0.3285, -0.0086, -0.0198,  0.4093,  0.1294,
         0.2977,  0.0897,  0.4498,  0.0074,  0.2977,  0.1021, -0.0022,  0.4133,
         0.3115,  0.0948,  0.2562, -0.0277,  0.0101, -0.0198,  0.1562, -0.0311,
        -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0172,  0.8606,  

Tokens embeddings:
tensor([ 0.0274,  0.0172,  0.8606,  0.0484,  0.0042,  0.8046, -0.0311,  0.2977,
        -0.0198,  0.4093,  0.1202,  0.1368,  0.1021,  0.0066,  0.2662, -0.0522,
         0.3056, -0.0546,  0.0444,  1.4744, -0.0277,  0.0194,  0.0303, -0.0148,
         0.9803,  0.6025, -0.0590,  0.3285, -0.0086, -0.0198,  0.4093,  0.1294,
         0.2977,  0.0897,  0.4498,  0.0074,  0.2977,  0.1021, -0.0022,  0.4133,
         0.3115,  0.0948,  0.2562, -0.0277,  0.0101, -0.0198,  0.1562, -0.0311,
        -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0172,  0.8606,  

Tokens embeddings:
tensor([ 0.0274,  0.0172,  0.8606,  0.0484,  0.0042,  0.8046, -0.0311,  0.2977,
        -0.0198,  0.4093,  0.1202,  0.1368,  0.1021,  0.0066,  0.2662, -0.0522,
         0.3056, -0.0546,  0.0444,  1.4744, -0.0277,  0.0194,  0.0303, -0.0148,
         0.9803,  0.6025, -0.0590,  0.3285, -0.0086, -0.0198,  0.4093,  0.1294,
         0.2977,  0.0897,  0.4498,  0.0074,  0.2977,  0.1021, -0.0022,  0.4133,
         0.3115,  0.0948,  0.2562, -0.0277,  0.0101, -0.0198,  0.1562, -0.0311,
        -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0172,  0.8606,  

Tokens embeddings:
tensor([ 0.0274,  0.0172,  0.8606,  0.0484,  0.0042,  0.8046, -0.0311,  0.2977,
        -0.0198,  0.4093,  0.1202,  0.1368,  0.1021,  0.0066,  0.2662, -0.0522,
         0.3056, -0.0546,  0.0444,  1.4744, -0.0277,  0.0194,  0.0303, -0.0148,
         0.9803,  0.6025, -0.0590,  0.3285, -0.0086, -0.0198,  0.4093,  0.1294,
         0.2977,  0.0897,  0.4498,  0.0074,  0.2977,  0.1021, -0.0022,  0.4133,
         0.3115,  0.0948,  0.2562, -0.0277,  0.0101, -0.0198,  0.1562, -0.0311,
        -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0172,  0.8606,  

Tokens embeddings:
tensor([ 0.0274,  0.0172,  0.8606,  0.0484,  0.0042,  0.8046, -0.0311,  0.2977,
        -0.0198,  0.4093,  0.1202,  0.1368,  0.1021,  0.0066,  0.2662, -0.0522,
         0.3056, -0.0546,  0.0444,  1.4744, -0.0277,  0.0194,  0.0303, -0.0148,
         0.9803,  0.6025, -0.0590,  0.3285, -0.0086, -0.0198,  0.4093,  0.1294,
         0.2977,  0.0897,  0.4498,  0.0074,  0.2977,  0.1021, -0.0022,  0.4133,
         0.3115,  0.0948,  0.2562, -0.0277,  0.0101, -0.0198,  0.1562, -0.0311,
        -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0172,  0.8606,  

Tokens embeddings:
tensor([ 0.0274,  0.0172,  0.8606,  0.0484,  0.0042,  0.8046, -0.0311,  0.2977,
        -0.0198,  0.4093,  0.1202,  0.1368,  0.1021,  0.0066,  0.2662, -0.0522,
         0.3056, -0.0546,  0.0444,  1.4744, -0.0277,  0.0194,  0.0303, -0.0148,
         0.9803,  0.6025, -0.0590,  0.3285, -0.0086, -0.0198,  0.4093,  0.1294,
         0.2977,  0.0897,  0.4498,  0.0074,  0.2977,  0.1021, -0.0022,  0.4133,
         0.3115,  0.0948,  0.2562, -0.0277,  0.0101, -0.0198,  0.1562, -0.0311,
        -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0172,  0.8606,  

Tokens embeddings:
tensor([ 0.0274,  0.0172,  0.8606,  0.0484,  0.0042,  0.8046, -0.0311,  0.2977,
        -0.0198,  0.4093,  0.1202,  0.1368,  0.1021,  0.0066,  0.2662, -0.0522,
         0.3056, -0.0546,  0.0444,  1.4744, -0.0277,  0.0194,  0.0303, -0.0148,
         0.9803,  0.6025, -0.0590,  0.3285, -0.0086, -0.0198,  0.4093,  0.1294,
         0.2977,  0.0897,  0.4498,  0.0074,  0.2977,  0.1021, -0.0022,  0.4133,
         0.3115,  0.0948,  0.2562, -0.0277,  0.0101, -0.0198,  0.1562, -0.0311,
        -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0172,  0.8606,  

Tokens embeddings:
tensor([ 0.0274,  0.0172,  0.8606,  0.0484,  0.0042,  0.8046, -0.0311,  0.2977,
        -0.0198,  0.4093,  0.1202,  0.1368,  0.1021,  0.0066,  0.2662, -0.0522,
         0.3056, -0.0546,  0.0444,  1.4744, -0.0277,  0.0194,  0.0303, -0.0148,
         0.9803,  0.6025, -0.0590,  0.3285, -0.0086, -0.0198,  0.4093,  0.1294,
         0.2977,  0.0897,  0.4498,  0.0074,  0.2977,  0.1021, -0.0022,  0.4133,
         0.3115,  0.0948,  0.2562, -0.0277,  0.0101, -0.0198,  0.1562, -0.0311,
        -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0172,  0.8606,  

Tokens embeddings:
tensor([ 0.0274,  0.0172,  0.8606,  0.0484,  0.0042,  0.8046, -0.0311,  0.2977,
        -0.0198,  0.4093,  0.1202,  0.1368,  0.1021,  0.0066,  0.2662, -0.0522,
         0.3056, -0.0546,  0.0444,  1.4744, -0.0277,  0.0194,  0.0303, -0.0148,
         0.9803,  0.6025, -0.0590,  0.3285, -0.0086, -0.0198,  0.4093,  0.1294,
         0.2977,  0.0897,  0.4498,  0.0074,  0.2977,  0.1021, -0.0022,  0.4133,
         0.3115,  0.0948,  0.2562, -0.0277,  0.0101, -0.0198,  0.1562, -0.0311,
        -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0172,  0.8606,  

Tokens embeddings:
tensor([ 0.0274,  0.0172,  0.8606,  0.0484,  0.0042,  0.8046, -0.0311,  0.2977,
        -0.0198,  0.4093,  0.1202,  0.1368,  0.1021,  0.0066,  0.2662, -0.0522,
         0.3056, -0.0546,  0.0444,  1.4744, -0.0277,  0.0194,  0.0303, -0.0148,
         0.9803,  0.6025, -0.0590,  0.3285, -0.0086, -0.0198,  0.4093,  0.1294,
         0.2977,  0.0897,  0.4498,  0.0074,  0.2977,  0.1021, -0.0022,  0.4133,
         0.3115,  0.0948,  0.2562, -0.0277,  0.0101, -0.0198,  0.1562, -0.0311,
        -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0172,  0.8606,  

Tokens embeddings:
tensor([ 0.0274,  0.0172,  0.8606,  0.0484,  0.0042,  0.8046, -0.0311,  0.2977,
        -0.0198,  0.4093,  0.1202,  0.1368,  0.1021,  0.0066,  0.2662, -0.0522,
         0.3056, -0.0546,  0.0444,  1.4744, -0.0277,  0.0194,  0.0303, -0.0148,
         0.9803,  0.6025, -0.0590,  0.3285, -0.0086, -0.0198,  0.4093,  0.1294,
         0.2977,  0.0897,  0.4498,  0.0074,  0.2977,  0.1021, -0.0022,  0.4133,
         0.3115,  0.0948,  0.2562, -0.0277,  0.0101, -0.0198,  0.1562, -0.0311,
        -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.0172,  0.8606,  

Tokens embeddings:
tensor([ 0.0274, -0.1941,  0.0576,  0.4237,  0.3695, -0.0022, -0.0277,  0.0101,
         0.3470,  0.2822, -0.0022,  1.1194,  0.0317, -0.0022, -0.0277,  0.0016,
         0.4257,  0.2574,  0.3216,  0.0015, -0.0277,  0.0101,  0.0185, -0.0025,
         0.0633,  0.2140,  0.1939,  0.1387,  0.4704, -0.0277,  0.0101,  0.0149,
         0.0332,  0.3716,  0.3184,  0.4704, -0.0411, -0.0277,  0.8606,  0.3214,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941,  0.0576,  

Tokens embeddings:
tensor([ 0.0274, -0.1941,  0.0576,  0.4237,  0.3695, -0.0022, -0.0277,  0.0101,
         0.3470,  0.2822, -0.0022,  1.1194,  0.0317, -0.0022, -0.0277,  0.0016,
         0.4257,  0.2574,  0.3216,  0.0015, -0.0277,  0.0101,  0.0185, -0.0025,
         0.0633,  0.2140,  0.1939,  0.1387,  0.4704, -0.0277,  0.0101,  0.0149,
         0.0332,  0.3716,  0.3184,  0.4704, -0.0411, -0.0277,  0.8606,  0.3214,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941,  0.0576,  

Tokens embeddings:
tensor([ 0.0274, -0.1941,  0.0576,  0.4237,  0.3695, -0.0022, -0.0277,  0.0101,
         0.3470,  0.2822, -0.0022,  1.1194,  0.0317, -0.0022, -0.0277,  0.0016,
         0.4257,  0.2574,  0.3216,  0.0015, -0.0277,  0.0101,  0.0185, -0.0025,
         0.0633,  0.2140,  0.1939,  0.1387,  0.4704, -0.0277,  0.0101,  0.0149,
         0.0332,  0.3716,  0.3184,  0.4704, -0.0411, -0.0277,  0.8606,  0.3214,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941,  0.0576,  

Tokens embeddings:
tensor([ 0.0274, -0.1941,  0.0576,  0.4237,  0.3695, -0.0022, -0.0277,  0.0101,
         0.3470,  0.2822, -0.0022,  1.1194,  0.0317, -0.0022, -0.0277,  0.0016,
         0.4257,  0.2574,  0.3216,  0.0015, -0.0277,  0.0101,  0.0185, -0.0025,
         0.0633,  0.2140,  0.1939,  0.1387,  0.4704, -0.0277,  0.0101,  0.0149,
         0.0332,  0.3716,  0.3184,  0.4704, -0.0411, -0.0277,  0.8606,  0.3214,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941,  0.0576,  

Tokens embeddings:
tensor([ 0.0274, -0.1941,  0.0576,  0.4237,  0.3695, -0.0022, -0.0277,  0.0101,
         0.3470,  0.2822, -0.0022,  1.1194,  0.0317, -0.0022, -0.0277,  0.0016,
         0.4257,  0.2574,  0.3216,  0.0015, -0.0277,  0.0101,  0.0185, -0.0025,
         0.0633,  0.2140,  0.1939,  0.1387,  0.4704, -0.0277,  0.0101,  0.0149,
         0.0332,  0.3716,  0.3184,  0.4704, -0.0411, -0.0277,  0.8606,  0.3214,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941,  0.0576,  

Tokens embeddings:
tensor([ 0.0274, -0.1941,  0.0576,  0.4237,  0.3695, -0.0022, -0.0277,  0.0101,
         0.3470,  0.2822, -0.0022,  1.1194,  0.0317, -0.0022, -0.0277,  0.0016,
         0.4257,  0.2574,  0.3216,  0.0015, -0.0277,  0.0101,  0.0185, -0.0025,
         0.0633,  0.2140,  0.1939,  0.1387,  0.4704, -0.0277,  0.0101,  0.0149,
         0.0332,  0.3716,  0.3184,  0.4704, -0.0411, -0.0277,  0.8606,  0.3214,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941,  0.0576,  

Tokens embeddings:
tensor([ 0.0274, -0.1941,  0.0576,  0.4237,  0.3695, -0.0022, -0.0277,  0.0101,
         0.3470,  0.2822, -0.0022,  1.1194,  0.0317, -0.0022, -0.0277,  0.0016,
         0.4257,  0.2574,  0.3216,  0.0015, -0.0277,  0.0101,  0.0185, -0.0025,
         0.0633,  0.2140,  0.1939,  0.1387,  0.4704, -0.0277,  0.0101,  0.0149,
         0.0332,  0.3716,  0.3184,  0.4704, -0.0411, -0.0277,  0.8606,  0.3214,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941,  0.0576,  

Tokens embeddings:
tensor([ 0.0274, -0.1941,  0.0576,  0.4237,  0.3695, -0.0022, -0.0277,  0.0101,
         0.3470,  0.2822, -0.0022,  1.1194,  0.0317, -0.0022, -0.0277,  0.0016,
         0.4257,  0.2574,  0.3216,  0.0015, -0.0277,  0.0101,  0.0185, -0.0025,
         0.0633,  0.2140,  0.1939,  0.1387,  0.4704, -0.0277,  0.0101,  0.0149,
         0.0332,  0.3716,  0.3184,  0.4704, -0.0411, -0.0277,  0.8606,  0.3214,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941,  0.0576,  

Tokens embeddings:
tensor([ 0.0274, -0.1941,  0.0576,  0.4237,  0.3695, -0.0022, -0.0277,  0.0101,
         0.3470,  0.2822, -0.0022,  1.1194,  0.0317, -0.0022, -0.0277,  0.0016,
         0.4257,  0.2574,  0.3216,  0.0015, -0.0277,  0.0101,  0.0185, -0.0025,
         0.0633,  0.2140,  0.1939,  0.1387,  0.4704, -0.0277,  0.0101,  0.0149,
         0.0332,  0.3716,  0.3184,  0.4704, -0.0411, -0.0277,  0.8606,  0.3214,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941,  0.0576,  

Tokens embeddings:
tensor([ 0.0274, -0.1941,  0.0576,  0.4237,  0.3695, -0.0022, -0.0277,  0.0101,
         0.3470,  0.2822, -0.0022,  1.1194,  0.0317, -0.0022, -0.0277,  0.0016,
         0.4257,  0.2574,  0.3216,  0.0015, -0.0277,  0.0101,  0.0185, -0.0025,
         0.0633,  0.2140,  0.1939,  0.1387,  0.4704, -0.0277,  0.0101,  0.0149,
         0.0332,  0.3716,  0.3184,  0.4704, -0.0411, -0.0277,  0.8606,  0.3214,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941,  0.0576,  

Tokens embeddings:
tensor([ 0.0274, -0.1941,  0.0576,  0.4237,  0.3695, -0.0022, -0.0277,  0.0101,
         0.3470,  0.2822, -0.0022,  1.1194,  0.0317, -0.0022, -0.0277,  0.0016,
         0.4257,  0.2574,  0.3216,  0.0015, -0.0277,  0.0101,  0.0185, -0.0025,
         0.0633,  0.2140,  0.1939,  0.1387,  0.4704, -0.0277,  0.0101,  0.0149,
         0.0332,  0.3716,  0.3184,  0.4704, -0.0411, -0.0277,  0.8606,  0.3214,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941,  0.0576,  

Tokens embeddings:
tensor([ 0.0274, -0.1941,  0.0576,  0.4237,  0.3695, -0.0022, -0.0277,  0.0101,
         0.3470,  0.2822, -0.0022,  1.1194,  0.0317, -0.0022, -0.0277,  0.0016,
         0.4257,  0.2574,  0.3216,  0.0015, -0.0277,  0.0101,  0.0185, -0.0025,
         0.0633,  0.2140,  0.1939,  0.1387,  0.4704, -0.0277,  0.0101,  0.0149,
         0.0332,  0.3716,  0.3184,  0.4704, -0.0411, -0.0277,  0.8606,  0.3214,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.1941,  0.0576,  

Tokens embeddings:
tensor([ 0.0274,  0.5736,  0.0665,  0.2911, -0.0277,  0.0066,  0.0073,  0.0434,
        -0.0022,  0.0332,  0.2977,  0.0266, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.5736,  0.0665,  

Tokens embeddings:
tensor([ 0.0274,  0.5736,  0.0665,  0.2911, -0.0277,  0.0066,  0.0073,  0.0434,
        -0.0022,  0.0332,  0.2977,  0.0266, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.5736,  0.0665,  

Tokens embeddings:
tensor([ 0.0274,  0.5736,  0.0665,  0.2911, -0.0277,  0.0066,  0.0073,  0.0434,
        -0.0022,  0.0332,  0.2977,  0.0266, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.5736,  0.0665,  

Tokens embeddings:
tensor([ 0.0274,  0.5736,  0.0665,  0.2911, -0.0277,  0.0066,  0.0073,  0.0434,
        -0.0022,  0.0332,  0.2977,  0.0266, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.5736,  0.0665,  

Tokens embeddings:
tensor([ 0.0274,  0.5736,  0.0665,  0.2911, -0.0277,  0.0066,  0.0073,  0.0434,
        -0.0022,  0.0332,  0.2977,  0.0266, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.5736,  0.0665,  

Tokens embeddings:
tensor([ 0.0274,  0.5736,  0.0665,  0.2911, -0.0277,  0.0066,  0.0073,  0.0434,
        -0.0022,  0.0332,  0.2977,  0.0266, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.5736,  0.0665,  

Tokens embeddings:
tensor([ 0.0274,  0.5736,  0.0665,  0.2911, -0.0277,  0.0066,  0.0073,  0.0434,
        -0.0022,  0.0332,  0.2977,  0.0266, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.5736,  0.0665,  

Tokens embeddings:
tensor([ 0.0274,  0.5736,  0.0665,  0.2911, -0.0277,  0.0066,  0.0073,  0.0434,
        -0.0022,  0.0332,  0.2977,  0.0266, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.5736,  0.0665,  

Tokens embeddings:
tensor([ 0.0274,  0.5736,  0.0665,  0.2911, -0.0277,  0.0066,  0.0073,  0.0434,
        -0.0022,  0.0332,  0.2977,  0.0266, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.5736,  0.0665,  

Tokens embeddings:
tensor([ 0.0274,  0.5736,  0.0665,  0.2911, -0.0277,  0.0066,  0.0073,  0.0434,
        -0.0022,  0.0332,  0.2977,  0.0266, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.5736,  0.0665,  

Tokens embeddings:
tensor([ 0.0274,  0.5736,  0.0665,  0.2911, -0.0277,  0.0066,  0.0073,  0.0434,
        -0.0022,  0.0332,  0.2977,  0.0266, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.5736,  0.0665,  

Tokens embeddings:
tensor([ 0.0274,  0.5736,  0.0665,  0.2911, -0.0277,  0.0066,  0.0073,  0.0434,
        -0.0022,  0.0332,  0.2977,  0.0266, -0.0277,  0.8606,  0.3214, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274,  0.5736,  0.0665,  

Tokens embeddings:
tensor([ 0.0274, -0.0546,  0.1734,  0.0843, -0.0276,  0.0948, -0.0229,  0.1686,
         0.1542, -0.0025,  0.1294,  0.2470,  0.1652, -0.0151,  0.6627,  0.3158,
        -0.0185,  0.0723,  0.0049,  0.0588,  0.2977, -0.0155,  0.0732,  0.6025,
         0.0779,  0.2562,  0.4237,  0.0149, -0.0277,  0.0101, -0.0198,  0.1562,
         0.1435, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0546,  0.1734,  

Tokens embeddings:
tensor([ 0.0274, -0.0546,  0.1734,  0.0843, -0.0276,  0.0948, -0.0229,  0.1686,
         0.1542, -0.0025,  0.1294,  0.2470,  0.1652, -0.0151,  0.6627,  0.3158,
        -0.0185,  0.0723,  0.0049,  0.0588,  0.2977, -0.0155,  0.0732,  0.6025,
         0.0779,  0.2562,  0.4237,  0.0149, -0.0277,  0.0101, -0.0198,  0.1562,
         0.1435, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0546,  0.1734,  

Tokens embeddings:
tensor([ 0.0274, -0.0546,  0.1734,  0.0843, -0.0276,  0.0948, -0.0229,  0.1686,
         0.1542, -0.0025,  0.1294,  0.2470,  0.1652, -0.0151,  0.6627,  0.3158,
        -0.0185,  0.0723,  0.0049,  0.0588,  0.2977, -0.0155,  0.0732,  0.6025,
         0.0779,  0.2562,  0.4237,  0.0149, -0.0277,  0.0101, -0.0198,  0.1562,
         0.1435, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0546,  0.1734,  

Tokens embeddings:
tensor([ 0.0274, -0.0546,  0.1734,  0.0843, -0.0276,  0.0948, -0.0229,  0.1686,
         0.1542, -0.0025,  0.1294,  0.2470,  0.1652, -0.0151,  0.6627,  0.3158,
        -0.0185,  0.0723,  0.0049,  0.0588,  0.2977, -0.0155,  0.0732,  0.6025,
         0.0779,  0.2562,  0.4237,  0.0149, -0.0277,  0.0101, -0.0198,  0.1562,
         0.1435, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0546,  0.1734,  

Tokens embeddings:
tensor([ 0.0274, -0.0546,  0.1734,  0.0843, -0.0276,  0.0948, -0.0229,  0.1686,
         0.1542, -0.0025,  0.1294,  0.2470,  0.1652, -0.0151,  0.6627,  0.3158,
        -0.0185,  0.0723,  0.0049,  0.0588,  0.2977, -0.0155,  0.0732,  0.6025,
         0.0779,  0.2562,  0.4237,  0.0149, -0.0277,  0.0101, -0.0198,  0.1562,
         0.1435, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0546,  0.1734,  

Tokens embeddings:
tensor([ 0.0274, -0.0546,  0.1734,  0.0843, -0.0276,  0.0948, -0.0229,  0.1686,
         0.1542, -0.0025,  0.1294,  0.2470,  0.1652, -0.0151,  0.6627,  0.3158,
        -0.0185,  0.0723,  0.0049,  0.0588,  0.2977, -0.0155,  0.0732,  0.6025,
         0.0779,  0.2562,  0.4237,  0.0149, -0.0277,  0.0101, -0.0198,  0.1562,
         0.1435, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0546,  0.1734,  

Tokens embeddings:
tensor([ 0.0274, -0.0546,  0.1734,  0.0843, -0.0276,  0.0948, -0.0229,  0.1686,
         0.1542, -0.0025,  0.1294,  0.2470,  0.1652, -0.0151,  0.6627,  0.3158,
        -0.0185,  0.0723,  0.0049,  0.0588,  0.2977, -0.0155,  0.0732,  0.6025,
         0.0779,  0.2562,  0.4237,  0.0149, -0.0277,  0.0101, -0.0198,  0.1562,
         0.1435, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0546,  0.1734,  

Tokens embeddings:
tensor([ 0.0274, -0.0546,  0.1734,  0.0843, -0.0276,  0.0948, -0.0229,  0.1686,
         0.1542, -0.0025,  0.1294,  0.2470,  0.1652, -0.0151,  0.6627,  0.3158,
        -0.0185,  0.0723,  0.0049,  0.0588,  0.2977, -0.0155,  0.0732,  0.6025,
         0.0779,  0.2562,  0.4237,  0.0149, -0.0277,  0.0101, -0.0198,  0.1562,
         0.1435, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0546,  0.1734,  

Tokens embeddings:
tensor([ 0.0274, -0.0546,  0.1734,  0.0843, -0.0276,  0.0948, -0.0229,  0.1686,
         0.1542, -0.0025,  0.1294,  0.2470,  0.1652, -0.0151,  0.6627,  0.3158,
        -0.0185,  0.0723,  0.0049,  0.0588,  0.2977, -0.0155,  0.0732,  0.6025,
         0.0779,  0.2562,  0.4237,  0.0149, -0.0277,  0.0101, -0.0198,  0.1562,
         0.1435, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0546,  0.1734,  

Tokens embeddings:
tensor([ 0.0274, -0.0546,  0.1734,  0.0843, -0.0276,  0.0948, -0.0229,  0.1686,
         0.1542, -0.0025,  0.1294,  0.2470,  0.1652, -0.0151,  0.6627,  0.3158,
        -0.0185,  0.0723,  0.0049,  0.0588,  0.2977, -0.0155,  0.0732,  0.6025,
         0.0779,  0.2562,  0.4237,  0.0149, -0.0277,  0.0101, -0.0198,  0.1562,
         0.1435, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0546,  0.1734,  

Tokens embeddings:
tensor([ 0.0274, -0.0546,  0.1734,  0.0843, -0.0276,  0.0948, -0.0229,  0.1686,
         0.1542, -0.0025,  0.1294,  0.2470,  0.1652, -0.0151,  0.6627,  0.3158,
        -0.0185,  0.0723,  0.0049,  0.0588,  0.2977, -0.0155,  0.0732,  0.6025,
         0.0779,  0.2562,  0.4237,  0.0149, -0.0277,  0.0101, -0.0198,  0.1562,
         0.1435, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0546,  0.1734,  

Tokens embeddings:
tensor([ 0.0274, -0.0546,  0.1734,  0.0843, -0.0276,  0.0948, -0.0229,  0.1686,
         0.1542, -0.0025,  0.1294,  0.2470,  0.1652, -0.0151,  0.6627,  0.3158,
        -0.0185,  0.0723,  0.0049,  0.0588,  0.2977, -0.0155,  0.0732,  0.6025,
         0.0779,  0.2562,  0.4237,  0.0149, -0.0277,  0.0101, -0.0198,  0.1562,
         0.1435, -0.0277,  0.8606,  0.3214, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022, -0.0022,
        -0.0022, -0.0022, -0.0022, -0.0022, -0.0022])
arg max of tensor([ 0.0274, -0.0546,  0.1734,  

In [32]:
# Pooling First token [CLS] for each sentence - argmax

# Select some sentences randomly
sent_index = [21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64]

for s in sent_index:
  print("*" * 100)
  # Get the sentence's words
  tokens = test_inputs[s]
  # For each layer...
  for l in range(12):
    print("\nLayer", l+1)
    attention = np.squeeze(test_attentions[s][l].detach().cpu().numpy(), axis=0)
    # and for each head
    #for h, head in enumerate(attention):
    #print("Head", h+1)
    # Get the attention for the cls token
    encoded_tokens = bert_tokenizer(tokens, padding=True, truncation=True, max_length=128, return_tensors='pt')
    encoded_tokens = encoded_tokens.to(device)
    with torch.no_grad():
      model_output1 = model_e(**encoded_tokens)
      tokens_embeddings = model_output1[0][:,0]
      tokens_embeddings = tokens_embeddings.cpu()
      cls_attentions = head[0]
      display(HTML(colorize(tokens, cls_attentions)))
      print("Tokens embeddings:")
      print(tokens_embeddings)
      arg = argmax(tokens_embeddings)
      print('arg max of %s: %d' % (tokens_embeddings, arg))

Output hidden; open in https://colab.research.google.com to view.

#### Visualize attentions for specific types of grammatical errors

In [33]:
# Lack of Subject-Verb Agreement
sentence1 = "I wrestled with this decision for years ."
# Pronoun Disagreement
sentence2 = "lined books are more enduring ."

In [34]:
# Encode the first sentence
encoded_sentence1 = bert_tokenizer.batch_encode_plus([sentence1], padding=True)

# Give as input to the model and get the outputs
inputs = torch.tensor(encoded_sentence1["input_ids"]).to(device)
att = torch.tensor(encoded_sentence1["attention_mask"]).to(device)
outputs = model_e(inputs, attention_mask=att)

In [35]:
# Get the predictions
output_probs = softmax(outputs.logits.detach().cpu().numpy(), axis=1)
predictions = (np.argmax(output_probs, axis=1))
print(sentence1, ":", predictions[0])

I wrestled with this decision for years . : 1


In [36]:
# Visualize the attention heatmaps for the CLS token
tokens = bert_tokenizer.convert_ids_to_tokens(inputs.detach().cpu().numpy()[0])
for l in range(12):
  print("\nLayer", l+1)
  attention = np.squeeze(outputs.attentions[l].detach().cpu().numpy(), axis=0)
  cls_attentions = []
  for h, head in enumerate(attention):
    print("Head", h+1)
    # Get the attention for the cls token
    cls_attentions = head[0]
    display(HTML(colorize(tokens, cls_attentions)))


Layer 1
Head 1


Head 2


Head 3


Head 4


Head 5


Head 6


Head 7


Head 8


Head 9


Head 10


Head 11


Head 12



Layer 2
Head 1


Head 2


Head 3


Head 4


Head 5


Head 6


Head 7


Head 8


Head 9


Head 10


Head 11


Head 12



Layer 3
Head 1


Head 2


Head 3


Head 4


Head 5


Head 6


Head 7


Head 8


Head 9


Head 10


Head 11


Head 12



Layer 4
Head 1


Head 2


Head 3


Head 4


Head 5


Head 6


Head 7


Head 8


Head 9


Head 10


Head 11


Head 12



Layer 5
Head 1


Head 2


Head 3


Head 4


Head 5


Head 6


Head 7


Head 8


Head 9


Head 10


Head 11


Head 12



Layer 6
Head 1


Head 2


Head 3


Head 4


Head 5


Head 6


Head 7


Head 8


Head 9


Head 10


Head 11


Head 12



Layer 7
Head 1


Head 2


Head 3


Head 4


Head 5


Head 6


Head 7


Head 8


Head 9


Head 10


Head 11


Head 12



Layer 8
Head 1


Head 2


Head 3


Head 4


Head 5


Head 6


Head 7


Head 8


Head 9


Head 10


Head 11


Head 12



Layer 9
Head 1


Head 2


Head 3


Head 4


Head 5


Head 6


Head 7


Head 8


Head 9


Head 10


Head 11


Head 12



Layer 10
Head 1


Head 2


Head 3


Head 4


Head 5


Head 6


Head 7


Head 8


Head 9


Head 10


Head 11


Head 12



Layer 11
Head 1


Head 2


Head 3


Head 4


Head 5


Head 6


Head 7


Head 8


Head 9


Head 10


Head 11


Head 12



Layer 12
Head 1


Head 2


Head 3


Head 4


Head 5


Head 6


Head 7


Head 8


Head 9


Head 10


Head 11


Head 12


In [37]:
# Encode the second sentence
encoded_sentence2 = bert_tokenizer.batch_encode_plus([sentence2], padding=True)

# Give as input to the model and get the outputs
inputs = torch.tensor(encoded_sentence2["input_ids"]).to(device)
att = torch.tensor(encoded_sentence2["attention_mask"]).to(device)
outputs = model_e(inputs, attention_mask=att)

# Get the predictions
output_probs = softmax(outputs.logits.detach().cpu().numpy(), axis=1)
predictions = (np.argmax(output_probs, axis=1))
print(sentence2, ":", predictions[0])

lined books are more enduring . : 0


In [38]:
# Visualize the attention heatmaps for the CLS token
tokens = bert_tokenizer.convert_ids_to_tokens(inputs.detach().cpu().numpy()[0])
for l in range(12):
  print("\nLayer", l+1)
  attention = np.squeeze(outputs.attentions[l].detach().cpu().numpy(), axis=0)
  cls_attentions = []
  for h, head in enumerate(attention):
    print("Head", h+1)
    # Get the attention for the cls token
    cls_attentions = head[0]
    display(HTML(colorize(tokens, cls_attentions)))


Layer 1
Head 1


Head 2


Head 3


Head 4


Head 5


Head 6


Head 7


Head 8


Head 9


Head 10


Head 11


Head 12



Layer 2
Head 1


Head 2


Head 3


Head 4


Head 5


Head 6


Head 7


Head 8


Head 9


Head 10


Head 11


Head 12



Layer 3
Head 1


Head 2


Head 3


Head 4


Head 5


Head 6


Head 7


Head 8


Head 9


Head 10


Head 11


Head 12



Layer 4
Head 1


Head 2


Head 3


Head 4


Head 5


Head 6


Head 7


Head 8


Head 9


Head 10


Head 11


Head 12



Layer 5
Head 1


Head 2


Head 3


Head 4


Head 5


Head 6


Head 7


Head 8


Head 9


Head 10


Head 11


Head 12



Layer 6
Head 1


Head 2


Head 3


Head 4


Head 5


Head 6


Head 7


Head 8


Head 9


Head 10


Head 11


Head 12



Layer 7
Head 1


Head 2


Head 3


Head 4


Head 5


Head 6


Head 7


Head 8


Head 9


Head 10


Head 11


Head 12



Layer 8
Head 1


Head 2


Head 3


Head 4


Head 5


Head 6


Head 7


Head 8


Head 9


Head 10


Head 11


Head 12



Layer 9
Head 1


Head 2


Head 3


Head 4


Head 5


Head 6


Head 7


Head 8


Head 9


Head 10


Head 11


Head 12



Layer 10
Head 1


Head 2


Head 3


Head 4


Head 5


Head 6


Head 7


Head 8


Head 9


Head 10


Head 11


Head 12



Layer 11
Head 1


Head 2


Head 3


Head 4


Head 5


Head 6


Head 7


Head 8


Head 9


Head 10


Head 11


Head 12



Layer 12
Head 1


Head 2


Head 3


Head 4


Head 5


Head 6


Head 7


Head 8


Head 9


Head 10


Head 11


Head 12
