In [1]:
from typing import Tuple, List
from functools import partial

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, RandomSampler
from torch.nn.utils.rnn import pad_sequence
from transformers import BertTokenizer, BertModel, AdamW, get_linear_schedule_with_warmup, BertPreTrainedModel
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from tqdm import tqdm


In [4]:
path = "../data"
bert_model_name = 'bert-base-uncased'
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
tokenizer = BertTokenizer.from_pretrained(bert_model_name, do_lower_case=True)
assert tokenizer.pad_token_id == 0, "Padding value used in masks is set to zero, please change it everywhere"
train_df = pd.read_csv(os.path.join(path, 'train.csv'))
# training on a part of data for speed
# train_df = train_df.sample(frac=0.33)
train_df, val_df = train_test_split(train_df, test_size=0.05)

In [5]:
torch.cuda.device_count()

1

In [6]:
class ToxicDataset(Dataset):

    def __init__(self, tokenizer: BertTokenizer, dataframe: pd.DataFrame,lazy: bool = False):
        self.tokenizer = tokenizer
        self.pad_idx = tokenizer.pad_token_id
        self.lazy = lazy
        if not self.lazy:
            self.X = []
            self.Y = []
            for i, (row) in tqdm(dataframe.iterrows()):
                x, y = self.row_to_tensor(self.tokenizer, row)
                self.X.append(x)
                self.Y.append(y)
        else:
            self.df = dataframe

    @staticmethod
    def row_to_tensor(tokenizer: BertTokenizer, row: pd.Series) -> Tuple[torch.LongTensor, torch.LongTensor]:
        tokens = tokenizer.encode(row["comment_text"], add_special_tokens=True,max_length=128)
        if len(tokens) > 128:
            tokens = tokens[:127] + [tokens[-1]]
        x = torch.LongTensor(tokens)
        y = torch.FloatTensor(row[["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]])
        return x, y


    def __len__(self):
        if self.lazy:
            return len(self.df)
        else:
            return len(self.X)

    def __getitem__(self, index: int) -> Tuple[torch.LongTensor, torch.LongTensor]:
        if not self.lazy:
            return self.X[index], self.Y[index]
        else:
            return self.row_to_tensor(self.tokenizer, self.df.iloc[index])


def collate_fn(batch: List[Tuple[torch.LongTensor, torch.LongTensor]], device: torch.device) \
        -> Tuple[torch.LongTensor, torch.LongTensor]:
    x, y = list(zip(*batch))
    x = pad_sequence(x, batch_first=True, padding_value=0)
    y = torch.stack(y)
    return x.to(device), y.to(device)


In [7]:
train_dataset = ToxicDataset(tokenizer, train_df, lazy=True)
dev_dataset = ToxicDataset(tokenizer, val_df, lazy=True)
collate_fn = partial(collate_fn, device=device)
BATCH_SIZE = 32
train_sampler = RandomSampler(train_dataset)
dev_sampler = RandomSampler(dev_dataset)
train_iterator = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=train_sampler, collate_fn=collate_fn)
dev_iterator = DataLoader(dev_dataset, batch_size=BATCH_SIZE, sampler=dev_sampler, collate_fn=collate_fn)


In [8]:
class Focal_Loss(nn.Module):
    def __init__(self, alpha=1, gamma=2):
        super(Focal_Loss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma

    def forward(self, inputs, targets):
        criterion = nn.BCELoss()
        loss = criterion(inputs, targets,reduce=False)
        pt = torch.exp(-loss)
        F_loss = self.alpha * (1-pt)**self.gamma * loss
        return torch.mean(F_loss)

In [9]:
class BertClassifier(nn.Module):

    def __init__(self, bert: BertModel, num_classes: int):
        super().__init__()
        self.bert = bert
        self.classifier = nn.Linear(bert.config.hidden_size, num_classes)

    def forward(self, input_ids, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None,

            labels=None):
        outputs = self.bert(input_ids,
                               attention_mask=attention_mask,
                               token_type_ids=token_type_ids,
                               position_ids=position_ids,
                               head_mask=head_mask)
        cls_output = outputs[1] # batch, hidden
        cls_output = self.classifier(cls_output) # batch, 6
        cls_output = torch.sigmoid(cls_output)
        criterion = Focal_Loss()
        loss = 0
        if labels is not None:
            loss = criterion(cls_output, labels)
        return loss, cls_output

In [10]:
model = BertClassifier(BertModel.from_pretrained(bert_model_name), 6).to(device)

In [11]:
def train(model, iterator, optimizer, scheduler):
    model.train()
    total_loss = 0
    for x, y in tqdm(iterator):
        optimizer.zero_grad()
        mask = (x != 0).float()
        loss, outputs = model(x, attention_mask=mask, labels=y)
        total_loss += loss.item()
        loss.backward()
        optimizer.step()
        scheduler.step()
    print(f"Train loss {total_loss / len(iterator)}")

def evaluate(model, iterator):
    model.eval()
    pred = []
    true = []
    with torch.no_grad():
        total_loss = 0
        for x, y in tqdm(iterator):
            mask = (x != 0).float()
            loss, outputs = model(x, attention_mask=mask, labels=y)
            total_loss += loss
            true += y.cpu().numpy().tolist()
            pred += outputs.cpu().numpy().tolist()
    true = np.array(true)
    pred = np.array(pred)
    for i, name in enumerate(['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']):
        print(f"{name} roc_auc {roc_auc_score(true[:, i], pred[:, i])}")
    print(f"Evaluate loss {total_loss / len(iterator)}")


In [12]:
no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
{'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
{'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
EPOCH_NUM = 1
# triangular learning rate, linearly grows untill half of first epoch, then linearly decays
warmup_steps = 10 ** 3
total_steps = len(train_iterator) * EPOCH_NUM - warmup_steps
optimizer = AdamW(optimizer_grouped_parameters, lr=2e-5, eps=1e-8)
scheduler = get_linear_schedule_with_warmup(optimizer, warmup_steps, total_steps)


In [13]:
for i in range(EPOCH_NUM):
    print('=' * 50, f"EPOCH {i}", '=' * 50)
    train(model, train_iterator, optimizer, scheduler)
    evaluate(model, dev_iterator)




  0%|          | 1/4738 [00:00<57:54,  1.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (579 > 512). Running this sequence through the model will result in indexing errors
  0%|          | 2/4738 [00:01<37:52,  2.08it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (569 > 512). Running this sequence through the model will result in indexing errors
  0%|          | 4/4738 [00:01<28:05,  2.81it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (840 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (808 > 512). Running this sequence through the model will result in indexing errors
  0%|          | 5/4738 [00:01<27:02,  2.92it/s]Token indices sequence length is longer than the specified maximum sequence leng

Token indices sequence length is longer than the specified maximum sequence length for this model (593 > 512). Running this sequence through the model will result in indexing errors
  1%|          | 59/4738 [00:17<22:55,  3.40it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1030 > 512). Running this sequence through the model will result in indexing errors
  1%|▏         | 62/4738 [00:18<22:42,  3.43it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (716 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (511 > 512). Running this sequence through the model will result in indexing errors
  1%|▏         | 66/4738 [00:19<22:29,  3.46it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (521 > 512). Running this 

  3%|▎         | 124/4738 [00:36<22:18,  3.45it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (524 > 512). Running this sequence through the model will result in indexing errors
  3%|▎         | 125/4738 [00:37<22:30,  3.42it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (648 > 512). Running this sequence through the model will result in indexing errors
  3%|▎         | 128/4738 [00:38<21:58,  3.50it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1634 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (839 > 512). Running this sequence through the model will result in indexing errors
  3%|▎         | 130/4738 [00:38<22:17,  3.45it/s]Token indices sequence length is longer than the specified maximum sequ

  4%|▍         | 184/4738 [00:54<22:08,  3.43it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1089 > 512). Running this sequence through the model will result in indexing errors
  4%|▍         | 186/4738 [00:55<22:14,  3.41it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (517 > 512). Running this sequence through the model will result in indexing errors
  4%|▍         | 188/4738 [00:55<22:02,  3.44it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1024 > 512). Running this sequence through the model will result in indexing errors
  4%|▍         | 190/4738 [00:56<22:14,  3.41it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1053 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum se

  5%|▍         | 231/4738 [01:08<21:44,  3.46it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (554 > 512). Running this sequence through the model will result in indexing errors
  5%|▍         | 232/4738 [01:08<22:01,  3.41it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (529 > 512). Running this sequence through the model will result in indexing errors
  5%|▍         | 233/4738 [01:09<22:10,  3.39it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (589 > 512). Running this sequence through the model will result in indexing errors
  5%|▍         | 234/4738 [01:09<22:17,  3.37it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (729 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum seque

  6%|▌         | 283/4738 [01:23<22:38,  3.28it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1192 > 512). Running this sequence through the model will result in indexing errors
  6%|▌         | 284/4738 [01:24<22:34,  3.29it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (838 > 512). Running this sequence through the model will result in indexing errors
  6%|▌         | 285/4738 [01:24<22:30,  3.30it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (911 > 512). Running this sequence through the model will result in indexing errors
  6%|▌         | 286/4738 [01:24<22:28,  3.30it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1241 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum seq

  7%|▋         | 334/4738 [01:38<21:36,  3.40it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (900 > 512). Running this sequence through the model will result in indexing errors
  7%|▋         | 335/4738 [01:39<21:43,  3.38it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (819 > 512). Running this sequence through the model will result in indexing errors
  7%|▋         | 336/4738 [01:39<22:04,  3.32it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (749 > 512). Running this sequence through the model will result in indexing errors
  7%|▋         | 338/4738 [01:40<21:48,  3.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1003 > 512). Running this sequence through the model will result in indexing errors
  7%|▋         | 339/4738 [01:40<21:41,  3.38it/s]Token indices sequenc

  8%|▊         | 396/4738 [01:57<21:10,  3.42it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (682 > 512). Running this sequence through the model will result in indexing errors
  8%|▊         | 399/4738 [01:58<20:58,  3.45it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (555 > 512). Running this sequence through the model will result in indexing errors
  8%|▊         | 400/4738 [01:58<21:01,  3.44it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (628 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1668 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (571 > 512). Running th

  9%|▉         | 449/4738 [02:12<21:27,  3.33it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (891 > 512). Running this sequence through the model will result in indexing errors
 10%|▉         | 451/4738 [02:13<21:10,  3.37it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (703 > 512). Running this sequence through the model will result in indexing errors
 10%|▉         | 452/4738 [02:13<21:21,  3.34it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (978 > 512). Running this sequence through the model will result in indexing errors
 10%|▉         | 454/4738 [02:14<21:14,  3.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (903 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum seque

 11%|█         | 505/4738 [02:29<21:01,  3.35it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (598 > 512). Running this sequence through the model will result in indexing errors
 11%|█         | 507/4738 [02:30<20:41,  3.41it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (827 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (607 > 512). Running this sequence through the model will result in indexing errors
 11%|█         | 508/4738 [02:30<20:58,  3.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (611 > 512). Running this sequence through the model will result in indexing errors
 11%|█         | 509/4738 [02:30<21:04,  3.34it/s]Token indices sequence length is longer than the specified maximum seque

 12%|█▏        | 546/4738 [02:41<21:14,  3.29it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (650 > 512). Running this sequence through the model will result in indexing errors
 12%|█▏        | 550/4738 [02:42<20:19,  3.43it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1320 > 512). Running this sequence through the model will result in indexing errors
 12%|█▏        | 551/4738 [02:43<20:41,  3.37it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1109 > 512). Running this sequence through the model will result in indexing errors
 12%|█▏        | 552/4738 [02:43<20:41,  3.37it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (618 > 512). Running this sequence through the model will result in indexing errors
 12%|█▏        | 553/4738 [02:43<20:49,  3.35it/s]Token indices sequen

 13%|█▎        | 606/4738 [02:59<20:17,  3.39it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (817 > 512). Running this sequence through the model will result in indexing errors
 13%|█▎        | 608/4738 [02:59<20:14,  3.40it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1355 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (537 > 512). Running this sequence through the model will result in indexing errors
 13%|█▎        | 609/4738 [03:00<20:37,  3.34it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (727 > 512). Running this sequence through the model will result in indexing errors
 13%|█▎        | 611/4738 [03:00<20:28,  3.36it/s]Token indices sequence length is longer than the specified maximum sequ

Token indices sequence length is longer than the specified maximum sequence length for this model (565 > 512). Running this sequence through the model will result in indexing errors
 14%|█▍        | 675/4738 [03:19<19:48,  3.42it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1620 > 512). Running this sequence through the model will result in indexing errors
 14%|█▍        | 676/4738 [03:19<19:59,  3.39it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (750 > 512). Running this sequence through the model will result in indexing errors
 14%|█▍        | 678/4738 [03:20<19:55,  3.40it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (554 > 512). Running this sequence through the model will result in indexing errors
 14%|█▍        | 681/4738 [03:21<19:41,  3.43it/s]Token indices sequence length is longer than the specified maximum sequ

 15%|█▌        | 726/4738 [03:34<19:50,  3.37it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (572 > 512). Running this sequence through the model will result in indexing errors
 15%|█▌        | 728/4738 [03:35<19:27,  3.43it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (840 > 512). Running this sequence through the model will result in indexing errors
 15%|█▌        | 729/4738 [03:35<19:31,  3.42it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (986 > 512). Running this sequence through the model will result in indexing errors
 16%|█▌        | 735/4738 [03:37<19:40,  3.39it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (574 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum seque

 17%|█▋        | 789/4738 [03:53<19:10,  3.43it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1111 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (545 > 512). Running this sequence through the model will result in indexing errors
 17%|█▋        | 790/4738 [03:53<19:33,  3.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (952 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (535 > 512). Running this sequence through the model will result in indexing errors
 17%|█▋        | 791/4738 [03:53<19:37,  3.35it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (992 > 512). Running th

 18%|█▊        | 836/4738 [04:07<19:38,  3.31it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (886 > 512). Running this sequence through the model will result in indexing errors
 18%|█▊        | 837/4738 [04:07<19:36,  3.32it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (517 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (530 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (628 > 512). Running this sequence through the model will result in indexing errors
 18%|█▊        | 838/4738 [04:07<19:41,  3.30it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1875 > 512). Running th

 19%|█▉        | 891/4738 [04:23<19:06,  3.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1670 > 512). Running this sequence through the model will result in indexing errors
 19%|█▉        | 893/4738 [04:24<18:58,  3.38it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (793 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1008 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (544 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (711 > 512). Running this sequence through the model will result in inde

 20%|██        | 949/4738 [04:40<18:46,  3.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (650 > 512). Running this sequence through the model will result in indexing errors
 20%|██        | 950/4738 [04:40<18:37,  3.39it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (749 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (534 > 512). Running this sequence through the model will result in indexing errors
 20%|██        | 951/4738 [04:41<18:41,  3.38it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1014 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1072 > 512). Running t

 21%|██        | 993/4738 [04:53<18:26,  3.38it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (866 > 512). Running this sequence through the model will result in indexing errors
 21%|██        | 995/4738 [04:54<18:08,  3.44it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (516 > 512). Running this sequence through the model will result in indexing errors
 21%|██        | 996/4738 [04:54<18:03,  3.45it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (958 > 512). Running this sequence through the model will result in indexing errors
 21%|██        | 1001/4738 [04:56<18:15,  3.41it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (831 > 512). Running this sequence through the model will result in indexing errors
 21%|██        | 1004/4738 [04:56<18:08,  3.43it/s]Token indices sequen

 22%|██▏       | 1058/4738 [05:12<17:53,  3.43it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (880 > 512). Running this sequence through the model will result in indexing errors
 22%|██▏       | 1060/4738 [05:13<17:56,  3.42it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1134 > 512). Running this sequence through the model will result in indexing errors
 22%|██▏       | 1061/4738 [05:13<18:17,  3.35it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (604 > 512). Running this sequence through the model will result in indexing errors
 22%|██▏       | 1062/4738 [05:13<18:16,  3.35it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (788 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum 

Token indices sequence length is longer than the specified maximum sequence length for this model (800 > 512). Running this sequence through the model will result in indexing errors
 23%|██▎       | 1113/4738 [05:29<17:46,  3.40it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (545 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (820 > 512). Running this sequence through the model will result in indexing errors
 24%|██▎       | 1114/4738 [05:29<18:00,  3.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (521 > 512). Running this sequence through the model will result in indexing errors
 24%|██▎       | 1115/4738 [05:29<17:49,  3.39it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (514 > 512). Running 

 25%|██▍       | 1164/4738 [05:44<17:32,  3.40it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1025 > 512). Running this sequence through the model will result in indexing errors
 25%|██▍       | 1167/4738 [05:45<17:28,  3.41it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (915 > 512). Running this sequence through the model will result in indexing errors
 25%|██▍       | 1171/4738 [05:46<17:09,  3.47it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1313 > 512). Running this sequence through the model will result in indexing errors
 25%|██▍       | 1172/4738 [05:46<17:19,  3.43it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (535 > 512). Running this sequence through the model will result in indexing errors
 25%|██▍       | 1173/4738 [05:46<17:20,  3.43it/s]Token indices s

 26%|██▌       | 1223/4738 [06:01<17:14,  3.40it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (717 > 512). Running this sequence through the model will result in indexing errors
 26%|██▌       | 1224/4738 [06:01<17:28,  3.35it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (629 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1013 > 512). Running this sequence through the model will result in indexing errors
 26%|██▌       | 1226/4738 [06:02<17:21,  3.37it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1040 > 512). Running this sequence through the model will result in indexing errors
 26%|██▌       | 1227/4738 [06:02<17:24,  3.36it/s]Token indices sequence length is longer than the specified maximum

 27%|██▋       | 1271/4738 [06:15<17:07,  3.37it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (722 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (618 > 512). Running this sequence through the model will result in indexing errors
 27%|██▋       | 1272/4738 [06:15<17:11,  3.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (709 > 512). Running this sequence through the model will result in indexing errors
 27%|██▋       | 1273/4738 [06:16<17:08,  3.37it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (895 > 512). Running this sequence through the model will result in indexing errors
 27%|██▋       | 1277/4738 [06:17<16:57,  3.40it/s]Token indices sequence length is longer than the specified maximum s

 28%|██▊       | 1323/4738 [06:31<16:31,  3.45it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (990 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (955 > 512). Running this sequence through the model will result in indexing errors
 28%|██▊       | 1324/4738 [06:31<16:56,  3.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1092 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (755 > 512). Running this sequence through the model will result in indexing errors
 28%|██▊       | 1326/4738 [06:31<17:05,  3.33it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (947 > 512). Running

 29%|██▉       | 1383/4738 [06:48<16:24,  3.41it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (656 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (639 > 512). Running this sequence through the model will result in indexing errors
 29%|██▉       | 1388/4738 [06:50<16:21,  3.41it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1167 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (842 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (880 > 512). Running this sequence through the model will result in ind

 31%|███       | 1447/4738 [07:07<16:22,  3.35it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (629 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1110 > 512). Running this sequence through the model will result in indexing errors
 31%|███       | 1448/4738 [07:07<16:26,  3.34it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (785 > 512). Running this sequence through the model will result in indexing errors
 31%|███       | 1450/4738 [07:08<16:07,  3.40it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (851 > 512). Running this sequence through the model will result in indexing errors
 31%|███       | 1451/4738 [07:08<16:12,  3.38it/s]Token indices sequence length is longer than the specified maximum 

Token indices sequence length is longer than the specified maximum sequence length for this model (671 > 512). Running this sequence through the model will result in indexing errors
 32%|███▏      | 1500/4738 [07:23<16:03,  3.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (540 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (2090 > 512). Running this sequence through the model will result in indexing errors
 32%|███▏      | 1502/4738 [07:23<16:02,  3.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (709 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (990 > 512). Running this sequence through the model will result in ind

Token indices sequence length is longer than the specified maximum sequence length for this model (714 > 512). Running this sequence through the model will result in indexing errors
 33%|███▎      | 1551/4738 [07:38<15:32,  3.42it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1082 > 512). Running this sequence through the model will result in indexing errors
 33%|███▎      | 1553/4738 [07:38<15:27,  3.43it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (716 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (707 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1078 > 512). Running this sequence through the model will result in in

 34%|███▍      | 1607/4738 [07:54<15:38,  3.34it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1664 > 512). Running this sequence through the model will result in indexing errors
 34%|███▍      | 1608/4738 [07:55<15:34,  3.35it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (953 > 512). Running this sequence through the model will result in indexing errors
 34%|███▍      | 1610/4738 [07:55<15:28,  3.37it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (670 > 512). Running this sequence through the model will result in indexing errors
 34%|███▍      | 1612/4738 [07:56<15:26,  3.37it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (608 > 512). Running this sequence through the model will result in indexing errors
 34%|███▍      | 1613/4738 [07:56<15:24,  3.38it/s]Token indices se

 35%|███▌      | 1675/4738 [08:14<15:21,  3.32it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1058 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (576 > 512). Running this sequence through the model will result in indexing errors
 35%|███▌      | 1677/4738 [08:15<15:24,  3.31it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (926 > 512). Running this sequence through the model will result in indexing errors
 35%|███▌      | 1679/4738 [08:16<15:10,  3.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (816 > 512). Running this sequence through the model will result in indexing errors
 35%|███▌      | 1680/4738 [08:16<15:14,  3.34it/s]Token indices sequence length is longer than the specified maximum 

 37%|███▋      | 1743/4738 [08:34<15:08,  3.30it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1074 > 512). Running this sequence through the model will result in indexing errors
 37%|███▋      | 1744/4738 [08:35<15:10,  3.29it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (830 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1015 > 512). Running this sequence through the model will result in indexing errors
 37%|███▋      | 1747/4738 [08:36<14:56,  3.34it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (576 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1474 > 512). Runni

 38%|███▊      | 1807/4738 [08:53<14:23,  3.40it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (982 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (936 > 512). Running this sequence through the model will result in indexing errors
 38%|███▊      | 1810/4738 [08:54<14:27,  3.37it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (671 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (571 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1807 > 512). Running this sequence through the model will result in ind

 39%|███▉      | 1843/4738 [09:04<14:18,  3.37it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (560 > 512). Running this sequence through the model will result in indexing errors
 39%|███▉      | 1844/4738 [09:04<14:25,  3.34it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1759 > 512). Running this sequence through the model will result in indexing errors
 39%|███▉      | 1846/4738 [09:05<14:12,  3.39it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1037 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (857 > 512). Running this sequence through the model will result in indexing errors
 39%|███▉      | 1849/4738 [09:06<14:10,  3.40it/s]Token indices sequence length is longer than the specified maximum

Token indices sequence length is longer than the specified maximum sequence length for this model (564 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (666 > 512). Running this sequence through the model will result in indexing errors
 40%|████      | 1896/4738 [09:20<14:20,  3.30it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (514 > 512). Running this sequence through the model will result in indexing errors
 40%|████      | 1897/4738 [09:20<14:12,  3.33it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1197 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (521 > 512). Running this sequence through the model will result in ind

 41%|████      | 1939/4738 [09:32<13:31,  3.45it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (782 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (964 > 512). Running this sequence through the model will result in indexing errors
 41%|████      | 1940/4738 [09:33<13:54,  3.35it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (748 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (706 > 512). Running this sequence through the model will result in indexing errors
 41%|████      | 1941/4738 [09:33<13:58,  3.34it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (945 > 512). Running 

 42%|████▏     | 1989/4738 [09:47<13:49,  3.32it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (568 > 512). Running this sequence through the model will result in indexing errors
 42%|████▏     | 1995/4738 [09:49<13:14,  3.45it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (682 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (556 > 512). Running this sequence through the model will result in indexing errors
 42%|████▏     | 1996/4738 [09:49<13:27,  3.40it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (957 > 512). Running this sequence through the model will result in indexing errors
 42%|████▏     | 2000/4738 [09:50<13:22,  3.41it/s]Token indices sequence length is longer than the specified maximum s

 43%|████▎     | 2046/4738 [10:04<13:21,  3.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (548 > 512). Running this sequence through the model will result in indexing errors
 43%|████▎     | 2047/4738 [10:04<13:12,  3.40it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (569 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1060 > 512). Running this sequence through the model will result in indexing errors
 43%|████▎     | 2048/4738 [10:05<13:29,  3.32it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1162 > 512). Running this sequence through the model will result in indexing errors
 43%|████▎     | 2049/4738 [10:05<13:31,  3.31it/s]Token indices sequence length is longer than the specified maximum

Token indices sequence length is longer than the specified maximum sequence length for this model (988 > 512). Running this sequence through the model will result in indexing errors
 44%|████▍     | 2091/4738 [10:17<12:59,  3.40it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (959 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1108 > 512). Running this sequence through the model will result in indexing errors
 44%|████▍     | 2092/4738 [10:18<13:09,  3.35it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (515 > 512). Running this sequence through the model will result in indexing errors
 44%|████▍     | 2094/4738 [10:18<12:52,  3.42it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1010 > 512). Runnin

 45%|████▌     | 2145/4738 [10:33<12:59,  3.32it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (561 > 512). Running this sequence through the model will result in indexing errors
 45%|████▌     | 2146/4738 [10:34<12:55,  3.34it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1067 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (647 > 512). Running this sequence through the model will result in indexing errors
 45%|████▌     | 2149/4738 [10:35<12:46,  3.38it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (681 > 512). Running this sequence through the model will result in indexing errors
 45%|████▌     | 2151/4738 [10:35<12:31,  3.44it/s]Token indices sequence length is longer than the specified maximum 

Token indices sequence length is longer than the specified maximum sequence length for this model (673 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (775 > 512). Running this sequence through the model will result in indexing errors
 46%|████▋     | 2201/4738 [10:50<12:52,  3.29it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (2692 > 512). Running this sequence through the model will result in indexing errors
 46%|████▋     | 2203/4738 [10:51<12:40,  3.33it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1037 > 512). Running this sequence through the model will result in indexing errors
 47%|████▋     | 2206/4738 [10:51<12:30,  3.37it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (706 > 512). Runnin

 47%|████▋     | 2246/4738 [11:03<12:17,  3.38it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (961 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (515 > 512). Running this sequence through the model will result in indexing errors
 47%|████▋     | 2247/4738 [11:04<12:22,  3.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (928 > 512). Running this sequence through the model will result in indexing errors
 48%|████▊     | 2253/4738 [11:05<11:59,  3.45it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (778 > 512). Running this sequence through the model will result in indexing errors
 48%|████▊     | 2255/4738 [11:06<11:59,  3.45it/s]Token indices sequence length is longer than the specified maximum s

 49%|████▊     | 2303/4738 [11:20<12:43,  3.19it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (977 > 512). Running this sequence through the model will result in indexing errors
 49%|████▊     | 2304/4738 [11:21<12:37,  3.21it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (668 > 512). Running this sequence through the model will result in indexing errors
 49%|████▉     | 2310/4738 [11:22<11:49,  3.42it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (789 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (955 > 512). Running this sequence through the model will result in indexing errors
 49%|████▉     | 2315/4738 [11:24<11:41,  3.46it/s]Token indices sequence length is longer than the specified maximum s

Token indices sequence length is longer than the specified maximum sequence length for this model (796 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (518 > 512). Running this sequence through the model will result in indexing errors
 50%|████▉     | 2364/4738 [11:38<11:43,  3.37it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (561 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1171 > 512). Running this sequence through the model will result in indexing errors
 50%|█████     | 2371/4738 [11:40<11:29,  3.43it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (761 > 512). Running this sequence through the model will result in ind

 51%|█████     | 2414/4738 [11:53<11:12,  3.46it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (756 > 512). Running this sequence through the model will result in indexing errors
 51%|█████     | 2421/4738 [11:55<11:15,  3.43it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (842 > 512). Running this sequence through the model will result in indexing errors
 51%|█████     | 2424/4738 [11:56<11:18,  3.41it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1031 > 512). Running this sequence through the model will result in indexing errors
 51%|█████     | 2427/4738 [11:57<11:16,  3.41it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1001 > 512). Running this sequence through the model will result in indexing errors
 51%|█████▏    | 2429/4738 [11:58<11:24,  3.37it/s]Token indices s

 52%|█████▏    | 2477/4738 [12:12<11:25,  3.30it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1041 > 512). Running this sequence through the model will result in indexing errors
 52%|█████▏    | 2480/4738 [12:13<11:04,  3.40it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1875 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (867 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (713 > 512). Running this sequence through the model will result in indexing errors
 52%|█████▏    | 2483/4738 [12:14<11:06,  3.38it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1173 > 512). Runni

 53%|█████▎    | 2534/4738 [12:29<11:06,  3.31it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (673 > 512). Running this sequence through the model will result in indexing errors
 54%|█████▎    | 2537/4738 [12:30<10:46,  3.41it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (870 > 512). Running this sequence through the model will result in indexing errors
 54%|█████▎    | 2539/4738 [12:30<10:45,  3.41it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (856 > 512). Running this sequence through the model will result in indexing errors
 54%|█████▎    | 2543/4738 [12:31<10:36,  3.45it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (545 > 512). Running this sequence through the model will result in indexing errors
 54%|█████▎    | 2545/4738 [12:32<10:32,  3.47it/s]Token indices seq

 55%|█████▍    | 2600/4738 [12:48<11:16,  3.16it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1264 > 512). Running this sequence through the model will result in indexing errors
 55%|█████▍    | 2601/4738 [12:48<11:05,  3.21it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (681 > 512). Running this sequence through the model will result in indexing errors
 55%|█████▍    | 2603/4738 [12:49<10:49,  3.29it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (923 > 512). Running this sequence through the model will result in indexing errors
 55%|█████▍    | 2604/4738 [12:49<10:47,  3.29it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (662 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum 

Token indices sequence length is longer than the specified maximum sequence length for this model (770 > 512). Running this sequence through the model will result in indexing errors
 56%|█████▌    | 2647/4738 [13:02<10:31,  3.31it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (883 > 512). Running this sequence through the model will result in indexing errors
 56%|█████▌    | 2649/4738 [13:03<10:16,  3.39it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (960 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (676 > 512). Running this sequence through the model will result in indexing errors
 56%|█████▌    | 2652/4738 [13:04<10:08,  3.43it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (544 > 512). Running 

 57%|█████▋    | 2698/4738 [13:17<09:58,  3.41it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (544 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (591 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (602 > 512). Running this sequence through the model will result in indexing errors
 57%|█████▋    | 2699/4738 [13:18<10:08,  3.35it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (809 > 512). Running this sequence through the model will result in indexing errors
 57%|█████▋    | 2700/4738 [13:18<10:10,  3.34it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (724 > 512). Running 

 58%|█████▊    | 2751/4738 [13:33<09:41,  3.41it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1124 > 512). Running this sequence through the model will result in indexing errors
 58%|█████▊    | 2753/4738 [13:34<09:42,  3.41it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (601 > 512). Running this sequence through the model will result in indexing errors
 58%|█████▊    | 2758/4738 [13:35<09:26,  3.50it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1875 > 512). Running this sequence through the model will result in indexing errors
 58%|█████▊    | 2759/4738 [13:35<09:36,  3.43it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (937 > 512). Running this sequence through the model will result in indexing errors
 58%|█████▊    | 2760/4738 [13:36<09:44,  3.38it/s]Token indices s

Token indices sequence length is longer than the specified maximum sequence length for this model (554 > 512). Running this sequence through the model will result in indexing errors
 59%|█████▉    | 2808/4738 [13:50<09:30,  3.38it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (827 > 512). Running this sequence through the model will result in indexing errors
 59%|█████▉    | 2809/4738 [13:50<09:29,  3.39it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1064 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (532 > 512). Running this sequence through the model will result in indexing errors
 59%|█████▉    | 2810/4738 [13:50<09:36,  3.34it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (531 > 512). Running

 60%|██████    | 2866/4738 [14:07<09:15,  3.37it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (873 > 512). Running this sequence through the model will result in indexing errors
 61%|██████    | 2867/4738 [14:07<09:25,  3.31it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (858 > 512). Running this sequence through the model will result in indexing errors
 61%|██████    | 2868/4738 [14:07<09:23,  3.32it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (530 > 512). Running this sequence through the model will result in indexing errors
 61%|██████    | 2870/4738 [14:08<09:07,  3.41it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1203 > 512). Running this sequence through the model will result in indexing errors
 61%|██████    | 2872/4738 [14:09<09:10,  3.39it/s]Token indices se

 62%|██████▏   | 2920/4738 [14:23<08:54,  3.40it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (854 > 512). Running this sequence through the model will result in indexing errors
 62%|██████▏   | 2922/4738 [14:23<08:53,  3.40it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (650 > 512). Running this sequence through the model will result in indexing errors
 62%|██████▏   | 2925/4738 [14:24<08:50,  3.42it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (774 > 512). Running this sequence through the model will result in indexing errors
 62%|██████▏   | 2928/4738 [14:25<08:46,  3.44it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1407 > 512). Running this sequence through the model will result in indexing errors
 62%|██████▏   | 2930/4738 [14:26<08:49,  3.42it/s]Token indices se

 63%|██████▎   | 2986/4738 [14:42<08:36,  3.39it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (981 > 512). Running this sequence through the model will result in indexing errors
 63%|██████▎   | 2988/4738 [14:43<08:32,  3.41it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (798 > 512). Running this sequence through the model will result in indexing errors
 63%|██████▎   | 2990/4738 [14:43<08:31,  3.42it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (642 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (900 > 512). Running this sequence through the model will result in indexing errors
 63%|██████▎   | 2991/4738 [14:44<08:41,  3.35it/s]Token indices sequence length is longer than the specified maximum s

 64%|██████▍   | 3038/4738 [14:58<08:19,  3.40it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (916 > 512). Running this sequence through the model will result in indexing errors
 64%|██████▍   | 3039/4738 [14:58<08:21,  3.39it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (742 > 512). Running this sequence through the model will result in indexing errors
 64%|██████▍   | 3040/4738 [14:58<08:25,  3.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (679 > 512). Running this sequence through the model will result in indexing errors
 64%|██████▍   | 3041/4738 [14:59<08:23,  3.37it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (627 > 512). Running this sequence through the model will result in indexing errors
 64%|██████▍   | 3042/4738 [14:59<08:19,  3.39it/s]Token indices seq

 65%|██████▌   | 3090/4738 [15:13<08:15,  3.33it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (595 > 512). Running this sequence through the model will result in indexing errors
 65%|██████▌   | 3095/4738 [15:15<08:00,  3.42it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (721 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (925 > 512). Running this sequence through the model will result in indexing errors
 65%|██████▌   | 3100/4738 [15:16<07:53,  3.46it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (762 > 512). Running this sequence through the model will result in indexing errors
 65%|██████▌   | 3101/4738 [15:16<08:00,  3.41it/s]Token indices sequence length is longer than the specified maximum s

 67%|██████▋   | 3163/4738 [15:35<07:33,  3.47it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (842 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (777 > 512). Running this sequence through the model will result in indexing errors
 67%|██████▋   | 3165/4738 [15:35<07:39,  3.42it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1029 > 512). Running this sequence through the model will result in indexing errors
 67%|██████▋   | 3168/4738 [15:36<07:41,  3.40it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (804 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (555 > 512). Running

Token indices sequence length is longer than the specified maximum sequence length for this model (739 > 512). Running this sequence through the model will result in indexing errors
 68%|██████▊   | 3214/4738 [15:50<07:34,  3.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (758 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (611 > 512). Running this sequence through the model will result in indexing errors
 68%|██████▊   | 3215/4738 [15:50<07:36,  3.33it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (535 > 512). Running this sequence through the model will result in indexing errors
 68%|██████▊   | 3216/4738 [15:50<07:31,  3.37it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1081 > 512). Running

 69%|██████▉   | 3263/4738 [16:04<07:10,  3.43it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (627 > 512). Running this sequence through the model will result in indexing errors
 69%|██████▉   | 3264/4738 [16:05<07:11,  3.41it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (605 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (997 > 512). Running this sequence through the model will result in indexing errors
 69%|██████▉   | 3265/4738 [16:05<07:20,  3.34it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (930 > 512). Running this sequence through the model will result in indexing errors
 69%|██████▉   | 3268/4738 [16:06<07:10,  3.42it/s]Token indices sequence length is longer than the specified maximum s

 70%|██████▉   | 3307/4738 [16:17<07:06,  3.35it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (584 > 512). Running this sequence through the model will result in indexing errors
 70%|██████▉   | 3309/4738 [16:18<07:01,  3.39it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1073 > 512). Running this sequence through the model will result in indexing errors
 70%|██████▉   | 3310/4738 [16:18<07:04,  3.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (942 > 512). Running this sequence through the model will result in indexing errors
 70%|██████▉   | 3311/4738 [16:19<07:05,  3.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (971 > 512). Running this sequence through the model will result in indexing errors
 70%|██████▉   | 3312/4738 [16:19<07:06,  3.34it/s]Token indices se

 71%|███████   | 3360/4738 [16:33<06:53,  3.33it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (818 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (843 > 512). Running this sequence through the model will result in indexing errors
 71%|███████   | 3361/4738 [16:34<06:57,  3.30it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (605 > 512). Running this sequence through the model will result in indexing errors
 71%|███████   | 3362/4738 [16:34<06:50,  3.35it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (888 > 512). Running this sequence through the model will result in indexing errors
 71%|███████   | 3363/4738 [16:34<06:51,  3.34it/s]Token indices sequence length is longer than the specified maximum s

 72%|███████▏  | 3405/4738 [16:47<06:51,  3.24it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (858 > 512). Running this sequence through the model will result in indexing errors
 72%|███████▏  | 3408/4738 [16:47<06:29,  3.42it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (527 > 512). Running this sequence through the model will result in indexing errors
 72%|███████▏  | 3411/4738 [16:48<06:32,  3.38it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (895 > 512). Running this sequence through the model will result in indexing errors
 72%|███████▏  | 3412/4738 [16:49<06:34,  3.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (541 > 512). Running this sequence through the model will result in indexing errors
 72%|███████▏  | 3413/4738 [16:49<06:34,  3.36it/s]Token indices seq

 73%|███████▎  | 3464/4738 [17:04<06:15,  3.39it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (772 > 512). Running this sequence through the model will result in indexing errors
 73%|███████▎  | 3465/4738 [17:04<06:14,  3.40it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (569 > 512). Running this sequence through the model will result in indexing errors
 73%|███████▎  | 3466/4738 [17:05<06:13,  3.41it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (976 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1059 > 512). Running this sequence through the model will result in indexing errors
 73%|███████▎  | 3468/4738 [17:05<06:16,  3.37it/s]Token indices sequence length is longer than the specified maximum 

 74%|███████▍  | 3504/4738 [17:16<06:11,  3.32it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (992 > 512). Running this sequence through the model will result in indexing errors
 74%|███████▍  | 3505/4738 [17:16<06:07,  3.35it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (733 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (647 > 512). Running this sequence through the model will result in indexing errors
 74%|███████▍  | 3506/4738 [17:17<06:09,  3.33it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (882 > 512). Running this sequence through the model will result in indexing errors
 74%|███████▍  | 3507/4738 [17:17<06:10,  3.32it/s]Token indices sequence length is longer than the specified maximum s

 75%|███████▍  | 3542/4738 [17:27<05:58,  3.33it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (616 > 512). Running this sequence through the model will result in indexing errors
 75%|███████▍  | 3543/4738 [17:28<05:56,  3.35it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (608 > 512). Running this sequence through the model will result in indexing errors
 75%|███████▍  | 3545/4738 [17:28<05:52,  3.38it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (829 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (606 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (577 > 512). Running 

 76%|███████▌  | 3596/4738 [17:43<05:35,  3.40it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (778 > 512). Running this sequence through the model will result in indexing errors
 76%|███████▌  | 3597/4738 [17:43<05:35,  3.40it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (606 > 512). Running this sequence through the model will result in indexing errors
 76%|███████▌  | 3598/4738 [17:44<05:38,  3.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (618 > 512). Running this sequence through the model will result in indexing errors
 76%|███████▌  | 3600/4738 [17:44<05:37,  3.37it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (869 > 512). Running this sequence through the model will result in indexing errors
 76%|███████▌  | 3601/4738 [17:45<05:39,  3.35it/s]Token indices seq

Token indices sequence length is longer than the specified maximum sequence length for this model (551 > 512). Running this sequence through the model will result in indexing errors
 77%|███████▋  | 3636/4738 [17:55<05:34,  3.29it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1069 > 512). Running this sequence through the model will result in indexing errors
 77%|███████▋  | 3637/4738 [17:55<05:34,  3.29it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (571 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (623 > 512). Running this sequence through the model will result in indexing errors
 77%|███████▋  | 3638/4738 [17:56<05:33,  3.30it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (649 > 512). Running

 78%|███████▊  | 3683/4738 [18:09<05:08,  3.42it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (768 > 512). Running this sequence through the model will result in indexing errors
 78%|███████▊  | 3686/4738 [18:10<05:03,  3.47it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (892 > 512). Running this sequence through the model will result in indexing errors
 78%|███████▊  | 3687/4738 [18:10<05:06,  3.43it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (594 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (738 > 512). Running this sequence through the model will result in indexing errors
 78%|███████▊  | 3688/4738 [18:11<05:09,  3.39it/s]Token indices sequence length is longer than the specified maximum s

 79%|███████▉  | 3738/4738 [18:25<05:04,  3.29it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (973 > 512). Running this sequence through the model will result in indexing errors
 79%|███████▉  | 3739/4738 [18:26<05:02,  3.31it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (751 > 512). Running this sequence through the model will result in indexing errors
 79%|███████▉  | 3743/4738 [18:27<04:51,  3.41it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1109 > 512). Running this sequence through the model will result in indexing errors
 79%|███████▉  | 3744/4738 [18:27<04:53,  3.38it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (752 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum 

 80%|████████  | 3796/4738 [18:43<04:41,  3.35it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (708 > 512). Running this sequence through the model will result in indexing errors
 80%|████████  | 3799/4738 [18:43<04:38,  3.37it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (737 > 512). Running this sequence through the model will result in indexing errors
 80%|████████  | 3800/4738 [18:44<04:38,  3.37it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (962 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1166 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (548 > 512). Running

 81%|████████  | 3846/4738 [18:57<04:30,  3.30it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (534 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (836 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (759 > 512). Running this sequence through the model will result in indexing errors
 81%|████████  | 3849/4738 [18:58<04:23,  3.38it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (700 > 512). Running this sequence through the model will result in indexing errors
 81%|████████▏ | 3850/4738 [18:59<04:23,  3.37it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (527 > 512). Running 

 82%|████████▏ | 3886/4738 [19:09<04:16,  3.32it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (668 > 512). Running this sequence through the model will result in indexing errors
 82%|████████▏ | 3887/4738 [19:10<04:15,  3.33it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (521 > 512). Running this sequence through the model will result in indexing errors
 82%|████████▏ | 3888/4738 [19:10<04:13,  3.35it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (899 > 512). Running this sequence through the model will result in indexing errors
 82%|████████▏ | 3889/4738 [19:10<04:17,  3.30it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (618 > 512). Running this sequence through the model will result in indexing errors
 82%|████████▏ | 3891/4738 [19:11<04:11,  3.37it/s]Token indices seq

 83%|████████▎ | 3937/4738 [19:24<03:58,  3.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (917 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (803 > 512). Running this sequence through the model will result in indexing errors
 83%|████████▎ | 3939/4738 [19:25<03:57,  3.37it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (865 > 512). Running this sequence through the model will result in indexing errors
 83%|████████▎ | 3940/4738 [19:25<03:55,  3.38it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (540 > 512). Running this sequence through the model will result in indexing errors
 83%|████████▎ | 3941/4738 [19:26<03:58,  3.34it/s]Token indices sequence length is longer than the specified maximum s

 84%|████████▍ | 3984/4738 [19:38<03:39,  3.44it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (867 > 512). Running this sequence through the model will result in indexing errors
 84%|████████▍ | 3986/4738 [19:39<03:38,  3.44it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (688 > 512). Running this sequence through the model will result in indexing errors
 84%|████████▍ | 3987/4738 [19:39<03:40,  3.40it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (594 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (531 > 512). Running this sequence through the model will result in indexing errors
 84%|████████▍ | 3989/4738 [19:40<03:38,  3.42it/s]Token indices sequence length is longer than the specified maximum s

 85%|████████▌ | 4029/4738 [19:52<03:36,  3.27it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (518 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (712 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (963 > 512). Running this sequence through the model will result in indexing errors
 85%|████████▌ | 4030/4738 [19:52<03:39,  3.23it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1562 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (791 > 512). Running this sequence through the model will result in ind

Token indices sequence length is longer than the specified maximum sequence length for this model (930 > 512). Running this sequence through the model will result in indexing errors
 86%|████████▌ | 4083/4738 [20:08<03:15,  3.35it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (822 > 512). Running this sequence through the model will result in indexing errors
 86%|████████▌ | 4085/4738 [20:08<03:11,  3.42it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (570 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1015 > 512). Running this sequence through the model will result in indexing errors
 86%|████████▋ | 4087/4738 [20:09<03:12,  3.38it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (869 > 512). Running

 87%|████████▋ | 4124/4738 [20:20<03:00,  3.39it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (529 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (687 > 512). Running this sequence through the model will result in indexing errors
 87%|████████▋ | 4125/4738 [20:20<03:02,  3.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1057 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (665 > 512). Running this sequence through the model will result in indexing errors
 87%|████████▋ | 4126/4738 [20:21<03:04,  3.32it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (554 > 512). Running

 88%|████████▊ | 4181/4738 [20:37<02:45,  3.37it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (626 > 512). Running this sequence through the model will result in indexing errors
 88%|████████▊ | 4182/4738 [20:37<02:45,  3.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (820 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (526 > 512). Running this sequence through the model will result in indexing errors
 88%|████████▊ | 4183/4738 [20:37<02:45,  3.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1524 > 512). Running this sequence through the model will result in indexing errors
 88%|████████▊ | 4184/4738 [20:38<02:45,  3.35it/s]Token indices sequence length is longer than the specified maximum 

 89%|████████▉ | 4232/4738 [20:52<02:26,  3.46it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (742 > 512). Running this sequence through the model will result in indexing errors
 89%|████████▉ | 4233/4738 [20:52<02:27,  3.41it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (513 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (715 > 512). Running this sequence through the model will result in indexing errors
 89%|████████▉ | 4234/4738 [20:52<02:29,  3.37it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1011 > 512). Running this sequence through the model will result in indexing errors
 89%|████████▉ | 4238/4738 [20:54<02:25,  3.44it/s]Token indices sequence length is longer than the specified maximum 

 91%|█████████ | 4294/4738 [21:10<02:11,  3.39it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (517 > 512). Running this sequence through the model will result in indexing errors
 91%|█████████ | 4296/4738 [21:11<02:10,  3.40it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (923 > 512). Running this sequence through the model will result in indexing errors
 91%|█████████ | 4297/4738 [21:11<02:10,  3.38it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (835 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1171 > 512). Running this sequence through the model will result in indexing errors
 91%|█████████ | 4298/4738 [21:11<02:12,  3.31it/s]Token indices sequence length is longer than the specified maximum 

 92%|█████████▏| 4343/4738 [21:25<01:56,  3.38it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (986 > 512). Running this sequence through the model will result in indexing errors
 92%|█████████▏| 4344/4738 [21:25<01:57,  3.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1065 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (926 > 512). Running this sequence through the model will result in indexing errors
 92%|█████████▏| 4346/4738 [21:25<01:56,  3.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1000 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (705 > 512). Runnin

 92%|█████████▏| 4374/4738 [21:34<01:52,  3.24it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (563 > 512). Running this sequence through the model will result in indexing errors
 92%|█████████▏| 4376/4738 [21:35<01:48,  3.34it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1743 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (993 > 512). Running this sequence through the model will result in indexing errors
 92%|█████████▏| 4377/4738 [21:35<01:49,  3.29it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (775 > 512). Running this sequence through the model will result in indexing errors
 92%|█████████▏| 4381/4738 [21:36<01:44,  3.41it/s]Token indices sequence length is longer than the specified maximum 

 94%|█████████▎| 4432/4738 [21:51<01:31,  3.34it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (573 > 512). Running this sequence through the model will result in indexing errors
 94%|█████████▎| 4433/4738 [21:51<01:31,  3.35it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (836 > 512). Running this sequence through the model will result in indexing errors
 94%|█████████▎| 4435/4738 [21:52<01:30,  3.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (533 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (558 > 512). Running this sequence through the model will result in indexing errors
 94%|█████████▎| 4436/4738 [21:52<01:30,  3.34it/s]Token indices sequence length is longer than the specified maximum s

 95%|█████████▍| 4482/4738 [22:06<01:15,  3.38it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (676 > 512). Running this sequence through the model will result in indexing errors
 95%|█████████▍| 4484/4738 [22:06<01:14,  3.43it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (630 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (577 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1078 > 512). Running this sequence through the model will result in indexing errors
 95%|█████████▍| 4485/4738 [22:07<01:16,  3.31it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (657 > 512). Running

 96%|█████████▌| 4532/4738 [22:21<01:01,  3.33it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (533 > 512). Running this sequence through the model will result in indexing errors
 96%|█████████▌| 4533/4738 [22:21<01:00,  3.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1362 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (666 > 512). Running this sequence through the model will result in indexing errors
 96%|█████████▌| 4534/4738 [22:21<01:01,  3.32it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (514 > 512). Running this sequence through the model will result in indexing errors
 96%|█████████▌| 4536/4738 [22:22<00:59,  3.41it/s]Token indices sequence length is longer than the specified maximum 

 97%|█████████▋| 4588/4738 [22:37<00:44,  3.38it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (688 > 512). Running this sequence through the model will result in indexing errors
 97%|█████████▋| 4589/4738 [22:38<00:43,  3.40it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1043 > 512). Running this sequence through the model will result in indexing errors
 97%|█████████▋| 4590/4738 [22:38<00:43,  3.40it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (741 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (629 > 512). Running this sequence through the model will result in indexing errors
 97%|█████████▋| 4591/4738 [22:38<00:43,  3.34it/s]Token indices sequence length is longer than the specified maximum 

 98%|█████████▊| 4643/4738 [22:53<00:27,  3.42it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (534 > 512). Running this sequence through the model will result in indexing errors
 98%|█████████▊| 4644/4738 [22:54<00:27,  3.41it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (705 > 512). Running this sequence through the model will result in indexing errors
 98%|█████████▊| 4645/4738 [22:54<00:27,  3.42it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (754 > 512). Running this sequence through the model will result in indexing errors
 98%|█████████▊| 4646/4738 [22:54<00:27,  3.40it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (587 > 512). Running this sequence through the model will result in indexing errors
 98%|█████████▊| 4648/4738 [22:55<00:26,  3.42it/s]Token indices seq

 99%|█████████▉| 4695/4738 [23:09<00:12,  3.40it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (731 > 512). Running this sequence through the model will result in indexing errors
 99%|█████████▉| 4696/4738 [23:09<00:12,  3.38it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (576 > 512). Running this sequence through the model will result in indexing errors
 99%|█████████▉| 4699/4738 [23:10<00:11,  3.45it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (896 > 512). Running this sequence through the model will result in indexing errors
 99%|█████████▉| 4700/4738 [23:10<00:11,  3.42it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (772 > 512). Running this sequence through the model will result in indexing errors
 99%|█████████▉| 4702/4738 [23:11<00:10,  3.47it/s]Token indices seq

Train loss 0.006991197652168955


  0%|          | 0/250 [00:00<?, ?it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (781 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (985 > 512). Running this sequence through the model will result in indexing errors
  0%|          | 1/250 [00:00<00:37,  6.72it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (538 > 512). Running this sequence through the model will result in indexing errors
  1%|          | 2/250 [00:00<00:35,  6.97it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (884 > 512). Running this sequence through the model will result in indexing errors
  1%|          | 3/250 [00:00<00:34,  7.10it/s]Token indices sequence length is longer than the specified maximum sequence length for this 

 20%|██        | 51/250 [00:06<00:27,  7.32it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (681 > 512). Running this sequence through the model will result in indexing errors
 21%|██        | 52/250 [00:06<00:27,  7.11it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (570 > 512). Running this sequence through the model will result in indexing errors
 21%|██        | 53/250 [00:07<00:27,  7.24it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (732 > 512). Running this sequence through the model will result in indexing errors
 22%|██▏       | 54/250 [00:07<00:26,  7.38it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (747 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence leng

 44%|████▍     | 111/250 [00:14<00:18,  7.57it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (999 > 512). Running this sequence through the model will result in indexing errors
 45%|████▍     | 112/250 [00:14<00:19,  7.19it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1145 > 512). Running this sequence through the model will result in indexing errors
 46%|████▌     | 115/250 [00:15<00:18,  7.42it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (851 > 512). Running this sequence through the model will result in indexing errors
 48%|████▊     | 120/250 [00:15<00:17,  7.62it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (568 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence

 68%|██████▊   | 171/250 [00:22<00:10,  7.50it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1031 > 512). Running this sequence through the model will result in indexing errors
 69%|██████▉   | 172/250 [00:22<00:10,  7.35it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (667 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (819 > 512). Running this sequence through the model will result in indexing errors
 69%|██████▉   | 173/250 [00:23<00:10,  7.26it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (760 > 512). Running this sequence through the model will result in indexing errors
 70%|██████▉   | 174/250 [00:23<00:10,  7.16it/s]Token indices sequence length is longer than the specified maximum sequence

Token indices sequence length is longer than the specified maximum sequence length for this model (781 > 512). Running this sequence through the model will result in indexing errors
 92%|█████████▏| 229/250 [00:30<00:02,  7.73it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (718 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (756 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (2500 > 512). Running this sequence through the model will result in indexing errors
 92%|█████████▏| 230/250 [00:30<00:02,  7.26it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (625 > 512). Running this sequence through the model will result in indexin

toxic roc_auc 0.983952827058185
severe_toxic roc_auc 0.9764822702044926
obscene roc_auc 0.9904147435502271
threat roc_auc 0.9054563741513705
insult roc_auc 0.9859203413679348
identity_hate roc_auc 0.9661202000142073
Evaluate loss 0.0002524217707104981


In [14]:
model.eval()
test_df = pd.read_csv(os.path.join(path, 'test.csv'))
submission = pd.read_csv(os.path.join(path, 'sample_submission.csv'))
columns = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
for i in tqdm(range(len(test_df) // BATCH_SIZE + 1)):
    batch_df = test_df.iloc[i * BATCH_SIZE: (i + 1) * BATCH_SIZE]
    assert (batch_df["id"] == submission["id"][i * BATCH_SIZE: (i + 1) * BATCH_SIZE]).all(), f"Id mismatch"
    texts = []
    for text in batch_df["comment_text"].tolist():
        text = tokenizer.encode(text, add_special_tokens=True, max_length=128)
        if len(text) > 120:
            text = text[:119] + [tokenizer.sep_token_id]
        texts.append(torch.LongTensor(text))
    x = pad_sequence(texts, batch_first=True, padding_value=tokenizer.pad_token_id).to(device)
    mask = (x != tokenizer.pad_token_id).float().to(device)
    with torch.no_grad():
        _, outputs = model(x, attention_mask=mask)
    outputs = outputs.cpu().numpy()
    submission.iloc[i * BATCH_SIZE: (i + 1) * BATCH_SIZE][columns] = outputs

  0%|          | 1/4787 [00:00<08:31,  9.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1006 > 512). Running this sequence through the model will result in indexing errors
  0%|          | 2/4787 [00:00<09:18,  8.56it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (608 > 512). Running this sequence through the model will result in indexing errors
  0%|          | 4/4787 [00:00<09:31,  8.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (798 > 512). Running this sequence through the model will result in indexing errors
  0%|          | 5/4787 [00:00<09:32,  8.35it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1103 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence le

  1%|          | 43/4787 [00:05<09:25,  8.39it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (833 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1213 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (695 > 512). Running this sequence through the model will result in indexing errors
  1%|          | 45/4787 [00:05<09:21,  8.44it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (575 > 512). Running this sequence through the model will result in indexing errors
  1%|          | 47/4787 [00:05<08:57,  8.82it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1159 > 512). Running this

  2%|▏         | 97/4787 [00:11<08:31,  9.17it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1250 > 512). Running this sequence through the model will result in indexing errors
  2%|▏         | 99/4787 [00:11<08:46,  8.91it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (566 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (560 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1116 > 512). Running this sequence through the model will result in indexing errors
  2%|▏         | 101/4787 [00:11<09:02,  8.64it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1096 > 512). Running th

  3%|▎         | 141/4787 [00:16<09:01,  8.59it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (731 > 512). Running this sequence through the model will result in indexing errors
  3%|▎         | 142/4787 [00:16<08:50,  8.76it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (512 > 512). Running this sequence through the model will result in indexing errors
  3%|▎         | 143/4787 [00:16<08:59,  8.61it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1667 > 512). Running this sequence through the model will result in indexing errors
  3%|▎         | 145/4787 [00:16<09:03,  8.54it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1125 > 512). Running this sequence through the model will result in indexing errors
  3%|▎         | 146/4787 [00:16<09:04,  8.53it/s]Token indices sequen

  4%|▍         | 191/4787 [00:22<08:27,  9.05it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (740 > 512). Running this sequence through the model will result in indexing errors
  4%|▍         | 195/4787 [00:22<08:40,  8.82it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (706 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1080 > 512). Running this sequence through the model will result in indexing errors
  4%|▍         | 197/4787 [00:22<08:30,  9.00it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (720 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (903 > 512). Running th

  5%|▌         | 255/4787 [00:29<08:56,  8.45it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (759 > 512). Running this sequence through the model will result in indexing errors
  5%|▌         | 257/4787 [00:29<08:32,  8.84it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (777 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1079 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (693 > 512). Running this sequence through the model will result in indexing errors
  5%|▌         | 259/4787 [00:29<08:40,  8.69it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (645 > 512). Running th

  6%|▌         | 298/4787 [00:34<08:46,  8.53it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (537 > 512). Running this sequence through the model will result in indexing errors
  6%|▌         | 299/4787 [00:34<08:37,  8.67it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (540 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (691 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1010 > 512). Running this sequence through the model will result in indexing errors
  6%|▋         | 300/4787 [00:34<09:11,  8.13it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (689 > 512). Running th

  7%|▋         | 354/4787 [00:40<08:31,  8.66it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1035 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (2073 > 512). Running this sequence through the model will result in indexing errors
  7%|▋         | 355/4787 [00:40<08:56,  8.27it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (901 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (777 > 512). Running this sequence through the model will result in indexing errors
  7%|▋         | 357/4787 [00:41<08:43,  8.46it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1082 > 512). Running 

  8%|▊         | 394/4787 [00:45<09:18,  7.86it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (655 > 512). Running this sequence through the model will result in indexing errors
  8%|▊         | 395/4787 [00:45<08:48,  8.31it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1002 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (854 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (956 > 512). Running this sequence through the model will result in indexing errors
  8%|▊         | 397/4787 [00:45<08:39,  8.45it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (592 > 512). Running th

  9%|▉         | 442/4787 [00:51<08:18,  8.71it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1061 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (958 > 512). Running this sequence through the model will result in indexing errors
  9%|▉         | 446/4787 [00:51<08:09,  8.87it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (936 > 512). Running this sequence through the model will result in indexing errors
  9%|▉         | 448/4787 [00:51<08:07,  8.91it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1119 > 512). Running this sequence through the model will result in indexing errors
  9%|▉         | 449/4787 [00:51<08:12,  8.82it/s]Token indices sequence length is longer than the specified maximum seq

 10%|█         | 501/4787 [00:57<08:17,  8.61it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (759 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (762 > 512). Running this sequence through the model will result in indexing errors
 11%|█         | 503/4787 [00:58<08:11,  8.72it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (522 > 512). Running this sequence through the model will result in indexing errors
 11%|█         | 504/4787 [00:58<08:13,  8.68it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (825 > 512). Running this sequence through the model will result in indexing errors
 11%|█         | 505/4787 [00:58<08:13,  8.67it/s]Token indices sequence length is longer than the specified maximum seque

 12%|█▏        | 552/4787 [01:03<07:37,  9.26it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1208 > 512). Running this sequence through the model will result in indexing errors
 12%|█▏        | 553/4787 [01:03<07:38,  9.24it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (585 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (656 > 512). Running this sequence through the model will result in indexing errors
 12%|█▏        | 558/4787 [01:04<07:38,  9.22it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (573 > 512). Running this sequence through the model will result in indexing errors
 12%|█▏        | 560/4787 [01:04<07:49,  9.00it/s]Token indices sequence length is longer than the specified maximum sequ

 13%|█▎        | 601/4787 [01:09<07:59,  8.73it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (887 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (629 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (666 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (688 > 512). Running this sequence through the model will result in indexing errors
 13%|█▎        | 606/4787 [01:09<07:38,  9.11it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (940 > 512). Running this sequence through the model will result in indexi

 14%|█▍        | 662/4787 [01:16<07:25,  9.25it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (905 > 512). Running this sequence through the model will result in indexing errors
 14%|█▍        | 664/4787 [01:16<07:30,  9.15it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1038 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (796 > 512). Running this sequence through the model will result in indexing errors
 14%|█▍        | 667/4787 [01:16<07:49,  8.77it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (773 > 512). Running this sequence through the model will result in indexing errors
 14%|█▍        | 668/4787 [01:16<07:49,  8.77it/s]Token indices sequence length is longer than the specified maximum sequ

 15%|█▌        | 719/4787 [01:22<07:50,  8.64it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1272 > 512). Running this sequence through the model will result in indexing errors
 15%|█▌        | 720/4787 [01:22<07:53,  8.59it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1139 > 512). Running this sequence through the model will result in indexing errors
 15%|█▌        | 721/4787 [01:23<07:55,  8.55it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (515 > 512). Running this sequence through the model will result in indexing errors
 15%|█▌        | 723/4787 [01:23<07:42,  8.78it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (577 > 512). Running this sequence through the model will result in indexing errors
 15%|█▌        | 724/4787 [01:23<07:47,  8.69it/s]Token indices sequen

 16%|█▋        | 783/4787 [01:30<07:43,  8.64it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (553 > 512). Running this sequence through the model will result in indexing errors
 16%|█▋        | 787/4787 [01:30<07:10,  9.29it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (773 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (538 > 512). Running this sequence through the model will result in indexing errors
 16%|█▋        | 788/4787 [01:30<07:31,  8.85it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (576 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (540 > 512). Running thi

 17%|█▋        | 831/4787 [01:35<07:30,  8.78it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (913 > 512). Running this sequence through the model will result in indexing errors
 17%|█▋        | 832/4787 [01:35<07:32,  8.75it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (840 > 512). Running this sequence through the model will result in indexing errors
 17%|█▋        | 834/4787 [01:36<07:26,  8.85it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (590 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (594 > 512). Running this sequence through the model will result in indexing errors
 17%|█▋        | 835/4787 [01:36<07:34,  8.70it/s]Token indices sequence length is longer than the specified maximum seque

 18%|█▊        | 878/4787 [01:41<07:24,  8.80it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (816 > 512). Running this sequence through the model will result in indexing errors
 18%|█▊        | 879/4787 [01:41<07:33,  8.61it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1665 > 512). Running this sequence through the model will result in indexing errors
 18%|█▊        | 881/4787 [01:41<07:21,  8.84it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (633 > 512). Running this sequence through the model will result in indexing errors
 18%|█▊        | 882/4787 [01:41<07:21,  8.85it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (741 > 512). Running this sequence through the model will result in indexing errors
 18%|█▊        | 883/4787 [01:41<07:14,  8.98it/s]Token indices sequenc

 19%|█▉        | 923/4787 [01:46<07:17,  8.82it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (641 > 512). Running this sequence through the model will result in indexing errors
 19%|█▉        | 925/4787 [01:46<07:14,  8.89it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (921 > 512). Running this sequence through the model will result in indexing errors
 19%|█▉        | 930/4787 [01:47<06:58,  9.22it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1015 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (971 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (833 > 512). Running th

 20%|██        | 974/4787 [01:52<07:00,  9.07it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (518 > 512). Running this sequence through the model will result in indexing errors
 20%|██        | 976/4787 [01:52<07:04,  8.97it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (642 > 512). Running this sequence through the model will result in indexing errors
 20%|██        | 980/4787 [01:52<06:59,  9.08it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1033 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1305 > 512). Running this sequence through the model will result in indexing errors
 20%|██        | 981/4787 [01:53<07:29,  8.47it/s]Token indices sequence length is longer than the specified maximum seq

 22%|██▏       | 1043/4787 [02:00<06:58,  8.94it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (968 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (933 > 512). Running this sequence through the model will result in indexing errors
 22%|██▏       | 1045/4787 [02:00<07:09,  8.72it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (523 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (583 > 512). Running this sequence through the model will result in indexing errors
 22%|██▏       | 1046/4787 [02:00<07:14,  8.61it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (608 > 512). Running 

 23%|██▎       | 1104/4787 [02:07<07:20,  8.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1765 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (931 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1001 > 512). Running this sequence through the model will result in indexing errors
 23%|██▎       | 1105/4787 [02:07<07:46,  7.90it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (771 > 512). Running this sequence through the model will result in indexing errors
 23%|██▎       | 1108/4787 [02:07<06:58,  8.80it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (585 > 512). Runnin

 24%|██▍       | 1158/4787 [02:13<06:55,  8.74it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (556 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1027 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (682 > 512). Running this sequence through the model will result in indexing errors
 24%|██▍       | 1159/4787 [02:13<07:35,  7.96it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (556 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1015 > 512). Running this sequence through the model will result in in

 25%|██▌       | 1202/4787 [02:18<07:27,  8.01it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (606 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (577 > 512). Running this sequence through the model will result in indexing errors
 25%|██▌       | 1203/4787 [02:18<07:28,  8.00it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1500 > 512). Running this sequence through the model will result in indexing errors
 25%|██▌       | 1204/4787 [02:18<07:36,  7.85it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (814 > 512). Running this sequence through the model will result in indexing errors
 25%|██▌       | 1206/4787 [02:19<07:03,  8.46it/s]Token indices sequence length is longer than the specified maximum 

 26%|██▌       | 1247/4787 [02:23<06:41,  8.81it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (684 > 512). Running this sequence through the model will result in indexing errors
 26%|██▌       | 1252/4787 [02:24<06:31,  9.04it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (881 > 512). Running this sequence through the model will result in indexing errors
 26%|██▌       | 1254/4787 [02:24<06:36,  8.90it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (737 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (855 > 512). Running this sequence through the model will result in indexing errors
 26%|██▌       | 1256/4787 [02:24<06:43,  8.76it/s]Token indices sequence length is longer than the specified maximum s

 28%|██▊       | 1325/4787 [02:32<06:50,  8.44it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (524 > 512). Running this sequence through the model will result in indexing errors
 28%|██▊       | 1327/4787 [02:32<06:36,  8.73it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (754 > 512). Running this sequence through the model will result in indexing errors
 28%|██▊       | 1329/4787 [02:33<06:31,  8.82it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (513 > 512). Running this sequence through the model will result in indexing errors
 28%|██▊       | 1330/4787 [02:33<06:29,  8.89it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (795 > 512). Running this sequence through the model will result in indexing errors
 28%|██▊       | 1332/4787 [02:33<06:41,  8.62it/s]Token indices seq

 29%|██▊       | 1373/4787 [02:38<06:47,  8.39it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (540 > 512). Running this sequence through the model will result in indexing errors
 29%|██▊       | 1374/4787 [02:38<06:50,  8.32it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (916 > 512). Running this sequence through the model will result in indexing errors
 29%|██▊       | 1375/4787 [02:38<06:57,  8.18it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (740 > 512). Running this sequence through the model will result in indexing errors
 29%|██▉       | 1377/4787 [02:38<06:35,  8.61it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1135 > 512). Running this sequence through the model will result in indexing errors
 29%|██▉       | 1378/4787 [02:38<06:35,  8.61it/s]Token indices se

 30%|██▉       | 1423/4787 [02:44<06:15,  8.95it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (612 > 512). Running this sequence through the model will result in indexing errors
 30%|██▉       | 1425/4787 [02:44<06:08,  9.13it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1304 > 512). Running this sequence through the model will result in indexing errors
 30%|██▉       | 1428/4787 [02:44<06:16,  8.92it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1116 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1033 > 512). Running this sequence through the model will result in indexing errors
 30%|██▉       | 1432/4787 [02:45<06:06,  9.17it/s]Token indices sequence length is longer than the specified maximu

 31%|███       | 1473/4787 [02:49<06:29,  8.52it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (900 > 512). Running this sequence through the model will result in indexing errors
 31%|███       | 1481/4787 [02:50<06:02,  9.11it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (916 > 512). Running this sequence through the model will result in indexing errors
 31%|███       | 1482/4787 [02:50<06:07,  8.98it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (576 > 512). Running this sequence through the model will result in indexing errors
 31%|███       | 1483/4787 [02:50<06:08,  8.96it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (516 > 512). Running this sequence through the model will result in indexing errors
 31%|███       | 1486/4787 [02:51<06:20,  8.69it/s]Token indices seq

 32%|███▏      | 1533/4787 [02:56<06:24,  8.47it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (623 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (924 > 512). Running this sequence through the model will result in indexing errors
 32%|███▏      | 1536/4787 [02:56<06:15,  8.65it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (533 > 512). Running this sequence through the model will result in indexing errors
 32%|███▏      | 1537/4787 [02:57<06:10,  8.77it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (974 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (3779 > 512). Running

 33%|███▎      | 1590/4787 [03:03<06:02,  8.82it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (956 > 512). Running this sequence through the model will result in indexing errors
 33%|███▎      | 1591/4787 [03:03<06:03,  8.79it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (865 > 512). Running this sequence through the model will result in indexing errors
 33%|███▎      | 1593/4787 [03:03<06:10,  8.61it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (520 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (537 > 512). Running this sequence through the model will result in indexing errors
 33%|███▎      | 1595/4787 [03:03<06:06,  8.71it/s]Token indices sequence length is longer than the specified maximum s

 34%|███▍      | 1642/4787 [03:09<06:10,  8.48it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (770 > 512). Running this sequence through the model will result in indexing errors
 34%|███▍      | 1643/4787 [03:09<06:04,  8.63it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1034 > 512). Running this sequence through the model will result in indexing errors
 34%|███▍      | 1644/4787 [03:09<06:06,  8.58it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (522 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (643 > 512). Running this sequence through the model will result in indexing errors
 34%|███▍      | 1645/4787 [03:09<06:06,  8.58it/s]Token indices sequence length is longer than the specified maximum 

Token indices sequence length is longer than the specified maximum sequence length for this model (882 > 512). Running this sequence through the model will result in indexing errors
 35%|███▌      | 1687/4787 [03:14<05:43,  9.04it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1627 > 512). Running this sequence through the model will result in indexing errors
 35%|███▌      | 1689/4787 [03:14<06:01,  8.58it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (922 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (923 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (708 > 512). Running this sequence through the model will result in ind

 36%|███▌      | 1727/4787 [03:19<06:26,  7.93it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1105 > 512). Running this sequence through the model will result in indexing errors
 36%|███▌      | 1728/4787 [03:19<06:15,  8.15it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1231 > 512). Running this sequence through the model will result in indexing errors
 36%|███▌      | 1730/4787 [03:19<05:59,  8.49it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (598 > 512). Running this sequence through the model will result in indexing errors
 36%|███▌      | 1731/4787 [03:19<05:56,  8.57it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (728 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum

 37%|███▋      | 1782/4787 [03:25<06:14,  8.03it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (531 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (538 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (543 > 512). Running this sequence through the model will result in indexing errors
 37%|███▋      | 1785/4787 [03:26<06:00,  8.34it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (544 > 512). Running this sequence through the model will result in indexing errors
 37%|███▋      | 1786/4787 [03:26<05:54,  8.47it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (706 > 512). Running 

 38%|███▊      | 1837/4787 [03:31<05:37,  8.74it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (550 > 512). Running this sequence through the model will result in indexing errors
 38%|███▊      | 1838/4787 [03:32<05:35,  8.78it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (585 > 512). Running this sequence through the model will result in indexing errors
 38%|███▊      | 1839/4787 [03:32<05:30,  8.91it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (626 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (595 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (806 > 512). Running 

Token indices sequence length is longer than the specified maximum sequence length for this model (606 > 512). Running this sequence through the model will result in indexing errors
 40%|███▉      | 1892/4787 [03:38<05:25,  8.88it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (693 > 512). Running this sequence through the model will result in indexing errors
 40%|███▉      | 1894/4787 [03:38<05:20,  9.02it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (700 > 512). Running this sequence through the model will result in indexing errors
 40%|███▉      | 1896/4787 [03:38<05:22,  8.96it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1642 > 512). Running this sequence through the model will result in indexing errors
 40%|███▉      | 1899/4787 [03:39<05:17,  9.11it/s]Token indices sequence length is longer than the specified maximum 

Token indices sequence length is longer than the specified maximum sequence length for this model (676 > 512). Running this sequence through the model will result in indexing errors
 41%|████      | 1944/4787 [03:44<05:25,  8.74it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1031 > 512). Running this sequence through the model will result in indexing errors
 41%|████      | 1945/4787 [03:44<05:32,  8.54it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1235 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (642 > 512). Running this sequence through the model will result in indexing errors
 41%|████      | 1947/4787 [03:44<05:37,  8.42it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (647 > 512). Runnin

 42%|████▏     | 2003/4787 [03:51<05:00,  9.28it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (2501 > 512). Running this sequence through the model will result in indexing errors
 42%|████▏     | 2004/4787 [03:51<05:15,  8.83it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (596 > 512). Running this sequence through the model will result in indexing errors
 42%|████▏     | 2008/4787 [03:51<05:00,  9.24it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1014 > 512). Running this sequence through the model will result in indexing errors
 42%|████▏     | 2009/4787 [03:51<05:04,  9.13it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (606 > 512). Running this sequence through the model will result in indexing errors
 42%|████▏     | 2010/4787 [03:51<05:09,  8.97it/s]Token indices s

 43%|████▎     | 2042/4787 [03:55<05:23,  8.50it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (630 > 512). Running this sequence through the model will result in indexing errors
 43%|████▎     | 2043/4787 [03:55<05:21,  8.54it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (777 > 512). Running this sequence through the model will result in indexing errors
 43%|████▎     | 2044/4787 [03:55<05:19,  8.58it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1009 > 512). Running this sequence through the model will result in indexing errors
 43%|████▎     | 2045/4787 [03:56<05:26,  8.41it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (524 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum 

 43%|████▎     | 2082/4787 [04:00<05:23,  8.37it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (572 > 512). Running this sequence through the model will result in indexing errors
 44%|████▎     | 2083/4787 [04:00<05:10,  8.72it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (539 > 512). Running this sequence through the model will result in indexing errors
 44%|████▎     | 2086/4787 [04:00<05:02,  8.92it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (979 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (583 > 512). Running this sequence through the model will result in indexing errors
 44%|████▎     | 2087/4787 [04:00<05:25,  8.31it/s]Token indices sequence length is longer than the specified maximum s

Token indices sequence length is longer than the specified maximum sequence length for this model (1034 > 512). Running this sequence through the model will result in indexing errors
 45%|████▍     | 2136/4787 [04:06<05:22,  8.23it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (3867 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1123 > 512). Running this sequence through the model will result in indexing errors
 45%|████▍     | 2139/4787 [04:07<05:12,  8.46it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (679 > 512). Running this sequence through the model will result in indexing errors
 45%|████▍     | 2141/4787 [04:07<04:58,  8.86it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (656 > 512). Runni

 46%|████▌     | 2198/4787 [04:13<04:42,  9.15it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (600 > 512). Running this sequence through the model will result in indexing errors
 46%|████▌     | 2199/4787 [04:13<04:45,  9.06it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (782 > 512). Running this sequence through the model will result in indexing errors
 46%|████▌     | 2206/4787 [04:14<04:35,  9.38it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (594 > 512). Running this sequence through the model will result in indexing errors
 46%|████▌     | 2207/4787 [04:14<04:40,  9.21it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (655 > 512). Running this sequence through the model will result in indexing errors
 46%|████▌     | 2208/4787 [04:14<04:49,  8.91it/s]Token indices seq

 47%|████▋     | 2254/4787 [04:20<04:57,  8.52it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (659 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (933 > 512). Running this sequence through the model will result in indexing errors
 47%|████▋     | 2255/4787 [04:20<05:07,  8.24it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (840 > 512). Running this sequence through the model will result in indexing errors
 47%|████▋     | 2261/4787 [04:20<04:43,  8.92it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (543 > 512). Running this sequence through the model will result in indexing errors
 47%|████▋     | 2263/4787 [04:21<04:38,  9.08it/s]Token indices sequence length is longer than the specified maximum s

 48%|████▊     | 2303/4787 [04:25<04:47,  8.63it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (932 > 512). Running this sequence through the model will result in indexing errors
 48%|████▊     | 2304/4787 [04:25<04:44,  8.72it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (818 > 512). Running this sequence through the model will result in indexing errors
 48%|████▊     | 2305/4787 [04:26<04:57,  8.33it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (529 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (695 > 512). Running this sequence through the model will result in indexing errors
 48%|████▊     | 2308/4787 [04:26<04:44,  8.70it/s]Token indices sequence length is longer than the specified maximum s

Token indices sequence length is longer than the specified maximum sequence length for this model (2306 > 512). Running this sequence through the model will result in indexing errors
 49%|████▉     | 2345/4787 [04:30<04:56,  8.23it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (802 > 512). Running this sequence through the model will result in indexing errors
 49%|████▉     | 2346/4787 [04:30<04:50,  8.41it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (591 > 512). Running this sequence through the model will result in indexing errors
 49%|████▉     | 2350/4787 [04:31<04:28,  9.07it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (545 > 512). Running this sequence through the model will result in indexing errors
 49%|████▉     | 2352/4787 [04:31<04:33,  8.90it/s]Token indices sequence length is longer than the specified maximum 

 50%|████▉     | 2391/4787 [04:36<04:43,  8.45it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1692 > 512). Running this sequence through the model will result in indexing errors
 50%|█████     | 2394/4787 [04:36<04:30,  8.86it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1097 > 512). Running this sequence through the model will result in indexing errors
 50%|█████     | 2395/4787 [04:36<04:27,  8.94it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1245 > 512). Running this sequence through the model will result in indexing errors
 50%|█████     | 2398/4787 [04:36<04:37,  8.62it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (663 > 512). Running this sequence through the model will result in indexing errors
 50%|█████     | 2405/4787 [04:37<04:21,  9.12it/s]Token indices 

 51%|█████▏    | 2456/4787 [04:43<04:19,  8.97it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1032 > 512). Running this sequence through the model will result in indexing errors
 51%|█████▏    | 2457/4787 [04:43<04:24,  8.80it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (564 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (533 > 512). Running this sequence through the model will result in indexing errors
 51%|█████▏    | 2458/4787 [04:43<04:32,  8.54it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1409 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (552 > 512). Runnin

Token indices sequence length is longer than the specified maximum sequence length for this model (4808 > 512). Running this sequence through the model will result in indexing errors
 53%|█████▎    | 2517/4787 [04:50<04:12,  9.01it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (806 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (627 > 512). Running this sequence through the model will result in indexing errors
 53%|█████▎    | 2518/4787 [04:50<04:21,  8.68it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (664 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (549 > 512). Running this sequence through the model will result in ind

 54%|█████▎    | 2567/4787 [04:56<04:22,  8.46it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1006 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1944 > 512). Running this sequence through the model will result in indexing errors
 54%|█████▎    | 2569/4787 [04:56<04:18,  8.58it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1056 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (621 > 512). Running this sequence through the model will result in indexing errors
 54%|█████▎    | 2571/4787 [04:56<04:15,  8.69it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1413 > 512). Runn

 55%|█████▍    | 2619/4787 [05:02<04:10,  8.64it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (693 > 512). Running this sequence through the model will result in indexing errors
 55%|█████▍    | 2620/4787 [05:02<04:06,  8.78it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (873 > 512). Running this sequence through the model will result in indexing errors
 55%|█████▍    | 2621/4787 [05:02<04:13,  8.54it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (542 > 512). Running this sequence through the model will result in indexing errors
 55%|█████▍    | 2622/4787 [05:02<04:11,  8.62it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (660 > 512). Running this sequence through the model will result in indexing errors
 55%|█████▍    | 2624/4787 [05:02<04:06,  8.79it/s]Token indices seq

 56%|█████▌    | 2663/4787 [05:07<04:32,  7.79it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1468 > 512). Running this sequence through the model will result in indexing errors
 56%|█████▌    | 2668/4787 [05:08<03:58,  8.87it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (819 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (923 > 512). Running this sequence through the model will result in indexing errors
 56%|█████▌    | 2669/4787 [05:08<04:06,  8.60it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (537 > 512). Running this sequence through the model will result in indexing errors
 56%|█████▌    | 2670/4787 [05:08<04:04,  8.65it/s]Token indices sequence length is longer than the specified maximum 

Token indices sequence length is longer than the specified maximum sequence length for this model (789 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (637 > 512). Running this sequence through the model will result in indexing errors
 57%|█████▋    | 2719/4787 [05:14<04:12,  8.19it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (2794 > 512). Running this sequence through the model will result in indexing errors
 57%|█████▋    | 2722/4787 [05:14<03:57,  8.70it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1010 > 512). Running this sequence through the model will result in indexing errors
 57%|█████▋    | 2723/4787 [05:14<03:57,  8.68it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (752 > 512). Runnin

 58%|█████▊    | 2755/4787 [05:18<03:51,  8.77it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (672 > 512). Running this sequence through the model will result in indexing errors
 58%|█████▊    | 2756/4787 [05:18<03:50,  8.80it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (883 > 512). Running this sequence through the model will result in indexing errors
 58%|█████▊    | 2757/4787 [05:18<03:49,  8.84it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (665 > 512). Running this sequence through the model will result in indexing errors
 58%|█████▊    | 2758/4787 [05:18<03:50,  8.79it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1345 > 512). Running this sequence through the model will result in indexing errors
 58%|█████▊    | 2762/4787 [05:19<03:49,  8.81it/s]Token indices se

 59%|█████▉    | 2823/4787 [05:25<03:37,  9.02it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (691 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (868 > 512). Running this sequence through the model will result in indexing errors
 59%|█████▉    | 2824/4787 [05:26<03:48,  8.61it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (642 > 512). Running this sequence through the model will result in indexing errors
 59%|█████▉    | 2826/4787 [05:26<03:39,  8.92it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (928 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (551 > 512). Running 

 60%|██████    | 2880/4787 [05:32<03:36,  8.79it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (691 > 512). Running this sequence through the model will result in indexing errors
 60%|██████    | 2881/4787 [05:32<03:34,  8.87it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1250 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (984 > 512). Running this sequence through the model will result in indexing errors
 60%|██████    | 2883/4787 [05:32<03:42,  8.55it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (984 > 512). Running this sequence through the model will result in indexing errors
 60%|██████    | 2886/4787 [05:33<03:34,  8.86it/s]Token indices sequence length is longer than the specified maximum 

Token indices sequence length is longer than the specified maximum sequence length for this model (1056 > 512). Running this sequence through the model will result in indexing errors
 61%|██████    | 2922/4787 [05:37<03:33,  8.73it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1361 > 512). Running this sequence through the model will result in indexing errors
 61%|██████    | 2923/4787 [05:37<03:37,  8.57it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1104 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (2365 > 512). Running this sequence through the model will result in indexing errors
 61%|██████    | 2924/4787 [05:37<03:45,  8.25it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (880 > 512). Runn

 62%|██████▏   | 2970/4787 [05:42<03:22,  8.99it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1022 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (945 > 512). Running this sequence through the model will result in indexing errors
 62%|██████▏   | 2971/4787 [05:43<03:37,  8.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1271 > 512). Running this sequence through the model will result in indexing errors
 62%|██████▏   | 2973/4787 [05:43<03:30,  8.61it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (902 > 512). Running this sequence through the model will result in indexing errors
 62%|██████▏   | 2976/4787 [05:43<03:26,  8.78it/s]Token indices sequence length is longer than the specified maximum

 63%|██████▎   | 3033/4787 [05:50<03:29,  8.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (924 > 512). Running this sequence through the model will result in indexing errors
 63%|██████▎   | 3035/4787 [05:50<03:23,  8.59it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (514 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (518 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1160 > 512). Running this sequence through the model will result in indexing errors
 63%|██████▎   | 3036/4787 [05:50<03:28,  8.38it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1823 > 512). Runnin

 64%|██████▍   | 3085/4787 [05:56<03:08,  9.02it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (693 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (579 > 512). Running this sequence through the model will result in indexing errors
 64%|██████▍   | 3086/4787 [05:56<03:10,  8.92it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1017 > 512). Running this sequence through the model will result in indexing errors
 64%|██████▍   | 3087/4787 [05:56<03:12,  8.84it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (760 > 512). Running this sequence through the model will result in indexing errors
 65%|██████▍   | 3088/4787 [05:56<03:14,  8.75it/s]Token indices sequence length is longer than the specified maximum 

 65%|██████▌   | 3130/4787 [06:01<03:12,  8.62it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (540 > 512). Running this sequence through the model will result in indexing errors
 65%|██████▌   | 3132/4787 [06:01<03:08,  8.77it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1274 > 512). Running this sequence through the model will result in indexing errors
 65%|██████▌   | 3133/4787 [06:01<03:16,  8.43it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1051 > 512). Running this sequence through the model will result in indexing errors
 65%|██████▌   | 3134/4787 [06:01<03:15,  8.46it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (893 > 512). Running this sequence through the model will result in indexing errors
 66%|██████▌   | 3138/4787 [06:02<03:01,  9.11it/s]Token indices s

Token indices sequence length is longer than the specified maximum sequence length for this model (1007 > 512). Running this sequence through the model will result in indexing errors
 66%|██████▋   | 3181/4787 [06:07<03:08,  8.52it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (668 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (681 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (808 > 512). Running this sequence through the model will result in indexing errors
 66%|██████▋   | 3182/4787 [06:07<03:11,  8.37it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1114 > 512). Running this sequence through the model will result in in

 67%|██████▋   | 3224/4787 [06:12<03:07,  8.33it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (794 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (753 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1781 > 512). Running this sequence through the model will result in indexing errors
 67%|██████▋   | 3225/4787 [06:12<03:24,  7.63it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1788 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (557 > 512). Running this sequence through the model will result in in

 69%|██████▊   | 3288/4787 [06:19<02:46,  9.00it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (735 > 512). Running this sequence through the model will result in indexing errors
 69%|██████▊   | 3290/4787 [06:19<02:48,  8.89it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (596 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (691 > 512). Running this sequence through the model will result in indexing errors
 69%|██████▊   | 3291/4787 [06:19<02:51,  8.74it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (599 > 512). Running this sequence through the model will result in indexing errors
 69%|██████▉   | 3292/4787 [06:19<02:48,  8.87it/s]Token indices sequence length is longer than the specified maximum s

Token indices sequence length is longer than the specified maximum sequence length for this model (1652 > 512). Running this sequence through the model will result in indexing errors
 69%|██████▉   | 3326/4787 [06:23<02:46,  8.78it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (679 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (735 > 512). Running this sequence through the model will result in indexing errors
 70%|██████▉   | 3329/4787 [06:24<02:42,  8.98it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (810 > 512). Running this sequence through the model will result in indexing errors
 70%|██████▉   | 3330/4787 [06:24<02:44,  8.85it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1283 > 512). Runnin

 71%|███████   | 3378/4787 [06:29<02:53,  8.13it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (891 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (586 > 512). Running this sequence through the model will result in indexing errors
 71%|███████   | 3380/4787 [06:30<02:47,  8.41it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (3451 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (668 > 512). Running this sequence through the model will result in indexing errors
 71%|███████   | 3383/4787 [06:30<02:48,  8.34it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (806 > 512). Running

Token indices sequence length is longer than the specified maximum sequence length for this model (658 > 512). Running this sequence through the model will result in indexing errors
 72%|███████▏  | 3429/4787 [06:35<02:42,  8.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (954 > 512). Running this sequence through the model will result in indexing errors
 72%|███████▏  | 3431/4787 [06:36<02:33,  8.81it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (910 > 512). Running this sequence through the model will result in indexing errors
 72%|███████▏  | 3432/4787 [06:36<02:37,  8.59it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1384 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (586 > 512). Running

 73%|███████▎  | 3479/4787 [06:41<02:26,  8.95it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1032 > 512). Running this sequence through the model will result in indexing errors
 73%|███████▎  | 3481/4787 [06:41<02:31,  8.64it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (3780 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (855 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (760 > 512). Running this sequence through the model will result in indexing errors
 73%|███████▎  | 3482/4787 [06:41<02:42,  8.01it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1009 > 512). Runni

 74%|███████▍  | 3533/4787 [06:47<02:27,  8.47it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1068 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1249 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (921 > 512). Running this sequence through the model will result in indexing errors
 74%|███████▍  | 3537/4787 [06:48<02:15,  9.21it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (597 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (854 > 512). Running this sequence through the model will result in in

 75%|███████▍  | 3579/4787 [06:53<02:12,  9.09it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (927 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1209 > 512). Running this sequence through the model will result in indexing errors
 75%|███████▍  | 3580/4787 [06:53<02:16,  8.85it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1532 > 512). Running this sequence through the model will result in indexing errors
 75%|███████▍  | 3583/4787 [06:53<02:14,  8.96it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1909 > 512). Running this sequence through the model will result in indexing errors
 75%|███████▍  | 3584/4787 [06:53<02:17,  8.77it/s]Token indices sequence length is longer than the specified maximu

Token indices sequence length is longer than the specified maximum sequence length for this model (1909 > 512). Running this sequence through the model will result in indexing errors
 76%|███████▌  | 3625/4787 [06:58<02:16,  8.48it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (2031 > 512). Running this sequence through the model will result in indexing errors
 76%|███████▌  | 3628/4787 [06:58<02:13,  8.67it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (658 > 512). Running this sequence through the model will result in indexing errors
 76%|███████▌  | 3629/4787 [06:58<02:14,  8.62it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1019 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (645 > 512). Runni

 77%|███████▋  | 3669/4787 [07:03<02:13,  8.40it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (607 > 512). Running this sequence through the model will result in indexing errors
 77%|███████▋  | 3670/4787 [07:03<02:10,  8.53it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1006 > 512). Running this sequence through the model will result in indexing errors
 77%|███████▋  | 3671/4787 [07:03<02:11,  8.51it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (514 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1025 > 512). Running this sequence through the model will result in indexing errors
 77%|███████▋  | 3673/4787 [07:04<02:09,  8.59it/s]Token indices sequence length is longer than the specified maximum

 78%|███████▊  | 3716/4787 [07:09<02:17,  7.78it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1036 > 512). Running this sequence through the model will result in indexing errors
 78%|███████▊  | 3717/4787 [07:09<02:16,  7.83it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1084 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (599 > 512). Running this sequence through the model will result in indexing errors
 78%|███████▊  | 3718/4787 [07:09<02:20,  7.64it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (934 > 512). Running this sequence through the model will result in indexing errors
 78%|███████▊  | 3720/4787 [07:09<02:10,  8.17it/s]Token indices sequence length is longer than the specified maximum

 79%|███████▊  | 3758/4787 [07:14<01:56,  8.86it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1210 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (600 > 512). Running this sequence through the model will result in indexing errors
 79%|███████▊  | 3760/4787 [07:14<02:00,  8.53it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1076 > 512). Running this sequence through the model will result in indexing errors
 79%|███████▊  | 3761/4787 [07:14<01:59,  8.56it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1466 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (877 > 512). Runni

 79%|███████▉  | 3798/4787 [07:18<02:20,  7.03it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (532 > 512). Running this sequence through the model will result in indexing errors
 79%|███████▉  | 3800/4787 [07:19<02:03,  8.00it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1268 > 512). Running this sequence through the model will result in indexing errors
 79%|███████▉  | 3801/4787 [07:19<02:04,  7.95it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1016 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (715 > 512). Running this sequence through the model will result in indexing errors
 80%|███████▉  | 3806/4787 [07:19<01:50,  8.91it/s]Token indices sequence length is longer than the specified maximum

 80%|████████  | 3850/4787 [07:24<01:51,  8.44it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (641 > 512). Running this sequence through the model will result in indexing errors
 80%|████████  | 3851/4787 [07:24<01:51,  8.42it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1112 > 512). Running this sequence through the model will result in indexing errors
 80%|████████  | 3852/4787 [07:25<01:51,  8.39it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (540 > 512). Running this sequence through the model will result in indexing errors
 80%|████████  | 3853/4787 [07:25<01:48,  8.60it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (843 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum 

 81%|████████▏ | 3898/4787 [07:30<01:41,  8.79it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1874 > 512). Running this sequence through the model will result in indexing errors
 81%|████████▏ | 3899/4787 [07:30<01:45,  8.42it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (992 > 512). Running this sequence through the model will result in indexing errors
 81%|████████▏ | 3900/4787 [07:30<01:47,  8.26it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (550 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (690 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (751 > 512). Running

 82%|████████▏ | 3945/4787 [07:35<01:36,  8.71it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (648 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (973 > 512). Running this sequence through the model will result in indexing errors
 82%|████████▏ | 3947/4787 [07:36<01:38,  8.57it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (996 > 512). Running this sequence through the model will result in indexing errors
 82%|████████▏ | 3949/4787 [07:36<01:35,  8.76it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (602 > 512). Running this sequence through the model will result in indexing errors
 83%|████████▎ | 3950/4787 [07:36<01:38,  8.53it/s]Token indices sequence length is longer than the specified maximum s

 84%|████████▎ | 3999/4787 [07:42<01:32,  8.54it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (841 > 512). Running this sequence through the model will result in indexing errors
 84%|████████▎ | 4000/4787 [07:42<01:32,  8.52it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (647 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1098 > 512). Running this sequence through the model will result in indexing errors
 84%|████████▎ | 4001/4787 [07:42<01:33,  8.39it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (974 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (583 > 512). Running

Token indices sequence length is longer than the specified maximum sequence length for this model (970 > 512). Running this sequence through the model will result in indexing errors
 85%|████████▍ | 4058/4787 [07:48<01:26,  8.43it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1017 > 512). Running this sequence through the model will result in indexing errors
 85%|████████▍ | 4059/4787 [07:48<01:27,  8.34it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (527 > 512). Running this sequence through the model will result in indexing errors
 85%|████████▍ | 4061/4787 [07:49<01:25,  8.53it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (813 > 512). Running this sequence through the model will result in indexing errors
 85%|████████▍ | 4062/4787 [07:49<01:24,  8.59it/s]Token indices sequence length is longer than the specified maximum 

Token indices sequence length is longer than the specified maximum sequence length for this model (972 > 512). Running this sequence through the model will result in indexing errors
 86%|████████▌ | 4102/4787 [07:53<01:23,  8.18it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1030 > 512). Running this sequence through the model will result in indexing errors
 86%|████████▌ | 4103/4787 [07:54<01:23,  8.20it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (516 > 512). Running this sequence through the model will result in indexing errors
 86%|████████▌ | 4107/4787 [07:54<01:15,  9.00it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (606 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (530 > 512). Running

 87%|████████▋ | 4164/4787 [08:00<01:08,  9.14it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1184 > 512). Running this sequence through the model will result in indexing errors
 87%|████████▋ | 4165/4787 [08:01<01:10,  8.84it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (659 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (552 > 512). Running this sequence through the model will result in indexing errors
 87%|████████▋ | 4166/4787 [08:01<01:12,  8.60it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1054 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (808 > 512). Runnin

Token indices sequence length is longer than the specified maximum sequence length for this model (774 > 512). Running this sequence through the model will result in indexing errors
 88%|████████▊ | 4216/4787 [08:07<01:05,  8.77it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1082 > 512). Running this sequence through the model will result in indexing errors
 88%|████████▊ | 4219/4787 [08:07<01:02,  9.08it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (589 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (754 > 512). Running this sequence through the model will result in indexing errors
 88%|████████▊ | 4221/4787 [08:07<01:04,  8.83it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1077 > 512). Runnin

 89%|████████▉ | 4273/4787 [08:13<00:57,  8.91it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (531 > 512). Running this sequence through the model will result in indexing errors
 89%|████████▉ | 4277/4787 [08:13<00:56,  9.03it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (582 > 512). Running this sequence through the model will result in indexing errors
 89%|████████▉ | 4278/4787 [08:14<00:57,  8.80it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1097 > 512). Running this sequence through the model will result in indexing errors
 89%|████████▉ | 4280/4787 [08:14<00:58,  8.68it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (940 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum 

 90%|█████████ | 4330/4787 [08:20<00:50,  9.03it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1389 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (621 > 512). Running this sequence through the model will result in indexing errors
 90%|█████████ | 4332/4787 [08:20<00:52,  8.71it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (575 > 512). Running this sequence through the model will result in indexing errors
 91%|█████████ | 4333/4787 [08:20<00:52,  8.68it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (568 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (728 > 512). Running

 91%|█████████▏| 4378/4787 [08:25<00:45,  8.97it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (903 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (738 > 512). Running this sequence through the model will result in indexing errors
 92%|█████████▏| 4382/4787 [08:26<00:44,  9.05it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (645 > 512). Running this sequence through the model will result in indexing errors
 92%|█████████▏| 4385/4787 [08:26<00:44,  9.04it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (519 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (810 > 512). Running 

 93%|█████████▎| 4430/4787 [08:31<00:40,  8.73it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1429 > 512). Running this sequence through the model will result in indexing errors
 93%|█████████▎| 4433/4787 [08:31<00:39,  8.91it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1627 > 512). Running this sequence through the model will result in indexing errors
 93%|█████████▎| 4435/4787 [08:32<00:40,  8.63it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1728 > 512). Running this sequence through the model will result in indexing errors
 93%|█████████▎| 4436/4787 [08:32<00:42,  8.24it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (932 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximu

 93%|█████████▎| 4474/4787 [08:36<00:38,  8.17it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (652 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1278 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1801 > 512). Running this sequence through the model will result in indexing errors
 94%|█████████▎| 4477/4787 [08:37<00:37,  8.21it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (687 > 512). Running this sequence through the model will result in indexing errors
 94%|█████████▎| 4478/4787 [08:37<00:37,  8.31it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (521 > 512). Runnin

 94%|█████████▍| 4518/4787 [08:41<00:31,  8.41it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1592 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (946 > 512). Running this sequence through the model will result in indexing errors
 94%|█████████▍| 4519/4787 [08:42<00:32,  8.16it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (680 > 512). Running this sequence through the model will result in indexing errors
 94%|█████████▍| 4520/4787 [08:42<00:31,  8.36it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1181 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (705 > 512). Runnin

 95%|█████████▌| 4562/4787 [08:47<00:27,  8.24it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1374 > 512). Running this sequence through the model will result in indexing errors
 95%|█████████▌| 4564/4787 [08:47<00:26,  8.46it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (817 > 512). Running this sequence through the model will result in indexing errors
 95%|█████████▌| 4567/4787 [08:47<00:24,  8.90it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (561 > 512). Running this sequence through the model will result in indexing errors
 95%|█████████▌| 4569/4787 [08:47<00:24,  9.03it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (963 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum 

 96%|█████████▌| 4605/4787 [08:52<00:21,  8.33it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1286 > 512). Running this sequence through the model will result in indexing errors
 96%|█████████▌| 4607/4787 [08:52<00:21,  8.52it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (892 > 512). Running this sequence through the model will result in indexing errors
 96%|█████████▋| 4610/4787 [08:52<00:19,  8.94it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1116 > 512). Running this sequence through the model will result in indexing errors
 96%|█████████▋| 4615/4787 [08:53<00:18,  9.09it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (789 > 512). Running this sequence through the model will result in indexing errors
 96%|█████████▋| 4616/4787 [08:53<00:19,  8.83it/s]Token indices s

 98%|█████████▊| 4668/4787 [08:59<00:14,  8.24it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (642 > 512). Running this sequence through the model will result in indexing errors
 98%|█████████▊| 4673/4787 [08:59<00:12,  9.22it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (648 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1086 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (657 > 512). Running this sequence through the model will result in indexing errors
 98%|█████████▊| 4674/4787 [08:59<00:13,  8.57it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1086 > 512). Runnin

 98%|█████████▊| 4712/4787 [09:04<00:08,  8.78it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (672 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (664 > 512). Running this sequence through the model will result in indexing errors
 98%|█████████▊| 4713/4787 [09:04<00:08,  8.56it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (660 > 512). Running this sequence through the model will result in indexing errors
 99%|█████████▊| 4716/4787 [09:04<00:08,  8.78it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1022 > 512). Running this sequence through the model will result in indexing errors
 99%|█████████▊| 4718/4787 [09:05<00:07,  8.98it/s]Token indices sequence length is longer than the specified maximum 

Token indices sequence length is longer than the specified maximum sequence length for this model (1534 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (995 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (632 > 512). Running this sequence through the model will result in indexing errors
 99%|█████████▉| 4759/4787 [09:09<00:03,  8.11it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (2292 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (939 > 512). Running this sequence through the model will result in indexing errors
 99%|█████████▉| 4761/4787 [09:10<00:

In [15]:
submission.to_csv("submission.csv", index=False)