In [1]:
%cd /content/drive/MyDrive/IIP2/Assignments/Final/HyperParameterOptimization

/content/drive/MyDrive/IIP2/Assignments/Final/HyperParameterOptimization


In [None]:
# Installing the "Simple Transformer" library
!pip install simpletransformers

Collecting simpletransformers
  Downloading simpletransformers-0.64.5-py3-none-any.whl (250 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/250.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m245.8/250.7 kB[0m [31m8.3 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m250.7/250.7 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
Collecting datasets (from simpletransformers)
  Downloading datasets-2.16.1-py3-none-any.whl (507 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m507.1/507.1 kB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
Collecting seqeval (from simpletransformers)
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting tensorboardx (from simpletransfor

In [None]:
# Importing the necessary libraries
import logging
import sklearn
import os
import requests
import json

In [None]:
# Getting training and testing datasets

os.mkdir('squad')
url = 'https://rajpurkar.github.io/SQuAD-explorer/dataset/'

for file in ['train-v2.0.json', 'dev-v2.0.json']:
  res = requests.get(f'{url}{file}') #make request
  with open(f'squad/{file}', 'wb') as f:
    for chunk in res.iter_content(chunk_size=4):
      f.write(chunk)

In [None]:
# Preparing a training dataset (130319 samples)

with open("squad/train-v2.0.json", "rb") as f:
  squad_dict = json.load(f)

train = []
for group in squad_dict['data']:
  train = train + group['paragraphs']

# Using a portion of the testing dataset to perform HyperParameter Optimization
train_data1 = train[0:80]

In [None]:
# Preparing an evaluation dataset (used during the training phase) (5448 samples)
with open("squad/dev-v2.0.json", "rb") as f:
  squad_dict = json.load(f)

eval = []
for i in range(17):
  eval = eval + squad_dict['data'][i]['paragraphs']

# Using a portion of the evaluation dataset to perform HyperParameter Optimization
eval_data1 = eval[0:70]

--------------------------------------
Configuring hyperparameter optimization through W&B Sweeps

In [None]:
import logging

import pandas as pd
import sklearn

import wandb
from simpletransformers.question_answering import (
    QuestionAnsweringModel,
    QuestionAnsweringArgs,
)

In [None]:
# Configuring the sweep

sweep_config = {
    # Specify the search strategy
    "method": "bayes",  # grid, random

    # Specify the metric to be optimized
    "metric": {"name": "train_loss", "goal": "minimize"},

    # Specify the hyperparameters and their values to explore
    "parameters": {
        "train_batch_size": {"values": [64, 128] },  # Discret values
        "learning_rate": {"min": 4e-5, "max": 4e-4}, # Range of values
    },
}

In [None]:
# Initialize a W&B sweep
sweep_id = wandb.sweep(sweep_config, project="HyperParameterFina")

logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)


In [None]:
# Set up the default configuration for the model

model_args = QuestionAnsweringArgs()
model_args.reprocess_input_data = True
model_args.overwrite_output_dir = True
model_args.evaluate_during_training = True
model_args.manual_seed = 4
model_args.use_multiprocessing = True
# For each exploration 3 epochs are performed
model_args.num_train_epochs = 3
model_args.eval_batch_size = 64
model_args.wandb_project = "HyperParameterFina"

In [None]:
# Set up the training function

def train1():
    # Initialize a new wandb run
    wandb.init()

    # Create a TransformerModel
    model = QuestionAnsweringModel(
        "roberta",
        "roberta-base",
        use_cuda=True,
        args=model_args,
        sweep_config=wandb.config,
    )

    # Train the model
    model.train_model(train_data=train_data1,eval_data=eval_data1)

    # Evaluate the model
    model.eval_model(eval_data=eval_data1)

    # Sync wandb
    wandb.join()

In [None]:
# Run the sweeps
wandb.agent(sweep_id, train1)

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Create sweep with ID: 69up2rlf
Sweep URL: https://wandb.ai/xhono/HyperParameterFina/sweeps/69up2rlf


[34m[1mwandb[0m: Agent Starting Run: soq18jjo with config:
[34m[1mwandb[0m: 	learning_rate: 0.00025759008553275317
[34m[1mwandb[0m: 	train_batch_size: 128
[34m[1mwandb[0m: Currently logged in as: [33mxhonihoxha53[0m ([33mxhono[0m). Use [1m`wandb login --relogin`[0m to force relogin


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at roberta-base and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

convert squad examples to features: 100%|██████████| 949/949 [00:03<00:00, 260.13it/s]
add example index and unique id: 100%|██████████| 949/949 [00:00<00:00, 355328.91it/s]


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]



VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Running Epoch 0 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:02<00:00, 166.72it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 313788.33it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:02<00:00, 185.96it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 328214.20it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:01<00:00, 261.65it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 453438.27it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

convert squad examples to features: 100%|██████████| 480/480 [00:01<00:00, 330.19it/s]
add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 267259.51it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.015 MB uploaded\r'), FloatProgress(value=0.07658020182009534, max=1.…

0,1
correct,█▃▁
eval_loss,█▃▁
global_step,▁▅█
incorrect,▁▆█
similar,▁▇█
train_loss,█▄▁

0,1
correct,148.0
eval_loss,-5.8125
global_step,24.0
incorrect,51.0
similar,281.0
train_loss,1.66551


[34m[1mwandb[0m: Agent Starting Run: x4vwux84 with config:
[34m[1mwandb[0m: 	learning_rate: 0.0002695173662732904
[34m[1mwandb[0m: 	train_batch_size: 128


Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at roberta-base and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
convert squad examples to features: 100%|██████████| 949/949 [00:03<00:00, 265.98it/s]
add example index and unique id: 100%|██████████| 949/949 [00:00<00:00, 492014.15it/s]


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]



VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1081388233237171, max=1.0…

Running Epoch 0 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:01<00:00, 263.17it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 261259.53it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:02<00:00, 161.81it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 323728.24it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:02<00:00, 236.83it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 435677.54it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

convert squad examples to features: 100%|██████████| 480/480 [00:02<00:00, 160.21it/s]
add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 420833.18it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

VBox(children=(Label(value='0.006 MB of 0.016 MB uploaded\r'), FloatProgress(value=0.3717354161581645, max=1.0…

0,1
correct,█▂▁
eval_loss,█▁▁
global_step,▁▅█
incorrect,▄▁█
similar,▁██
train_loss,█▄▁

0,1
correct,88.0
eval_loss,-5.4624
global_step,24.0
incorrect,63.0
similar,329.0
train_loss,1.37525


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: c24iaye3 with config:
[34m[1mwandb[0m: 	learning_rate: 7.746015471297475e-05
[34m[1mwandb[0m: 	train_batch_size: 64


Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at roberta-base and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
convert squad examples to features: 100%|██████████| 949/949 [00:04<00:00, 216.13it/s]
add example index and unique id: 100%|██████████| 949/949 [00:00<00:00, 477437.27it/s]


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Running Epoch 0 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:01<00:00, 246.87it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 367116.32it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:04<00:00, 102.69it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 251375.44it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:01<00:00, 266.59it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 333874.95it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

convert squad examples to features: 100%|██████████| 480/480 [00:02<00:00, 234.52it/s]
add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 207467.63it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.015 MB uploaded\r'), FloatProgress(value=0.07746346358903594, max=1.…

0,1
correct,█▁▆
eval_loss,█▅▁
global_step,▁▅█
incorrect,▁█▄
similar,▁█▁
train_loss,█▇▁

0,1
correct,102.0
eval_loss,-4.76025
global_step,48.0
incorrect,58.0
similar,320.0
train_loss,1.65166


[34m[1mwandb[0m: Agent Starting Run: 165yce7z with config:
[34m[1mwandb[0m: 	learning_rate: 0.00024731154497801563
[34m[1mwandb[0m: 	train_batch_size: 128


Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at roberta-base and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
convert squad examples to features: 100%|██████████| 949/949 [00:06<00:00, 158.16it/s]
add example index and unique id: 100%|██████████| 949/949 [00:00<00:00, 206270.12it/s]


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Running Epoch 0 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:01<00:00, 286.12it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 376945.50it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:02<00:00, 185.69it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 110594.70it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 134.49it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 67967.52it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

convert squad examples to features: 100%|██████████| 480/480 [00:02<00:00, 235.84it/s]
add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 349768.23it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.016 MB uploaded\r'), FloatProgress(value=0.07608093978719478, max=1.…

0,1
correct,█▁▂
eval_loss,█▂▁
global_step,▁▅█
incorrect,▁█▆
similar,▁██
train_loss,█▅▁

0,1
correct,109.0
eval_loss,-4.57031
global_step,24.0
incorrect,51.0
similar,320.0
train_loss,1.5703


[34m[1mwandb[0m: Agent Starting Run: uaxpi9r1 with config:
[34m[1mwandb[0m: 	learning_rate: 8.002112179876072e-05
[34m[1mwandb[0m: 	train_batch_size: 128


Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at roberta-base and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
convert squad examples to features: 100%|██████████| 949/949 [00:03<00:00, 256.69it/s]
add example index and unique id: 100%|██████████| 949/949 [00:00<00:00, 420138.75it/s]


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]



VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.10781577110956037, max=1.…

Running Epoch 0 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:02<00:00, 198.15it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 147265.45it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:02<00:00, 229.83it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 316203.22it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:01<00:00, 240.28it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 228468.67it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

convert squad examples to features: 100%|██████████| 480/480 [00:02<00:00, 180.05it/s]
add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 373241.74it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
correct,▁█▄
eval_loss,█▁▁
global_step,▁▅█
incorrect,█▁▃
similar,▃▁█
train_loss,█▄▁

0,1
correct,95.0
eval_loss,-3.59668
global_step,24.0
incorrect,55.0
similar,330.0
train_loss,2.10918


[34m[1mwandb[0m: Agent Starting Run: 79s2cwh1 with config:
[34m[1mwandb[0m: 	learning_rate: 0.00022726504332235467
[34m[1mwandb[0m: 	train_batch_size: 128


Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at roberta-base and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
convert squad examples to features: 100%|██████████| 949/949 [00:07<00:00, 130.37it/s]
add example index and unique id: 100%|██████████| 949/949 [00:00<00:00, 159913.00it/s]


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Running Epoch 0 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:01<00:00, 249.46it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 145888.83it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:01<00:00, 254.38it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 456730.02it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 148.11it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 320839.19it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

convert squad examples to features: 100%|██████████| 480/480 [00:02<00:00, 213.80it/s]
add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 289720.24it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
correct,█▁▄
eval_loss,█▇▁
global_step,▁▅█
incorrect,▁█▃
similar,▁█▆
train_loss,█▄▁

0,1
correct,149.0
eval_loss,-4.51465
global_step,24.0
incorrect,34.0
similar,297.0
train_loss,1.48456


[34m[1mwandb[0m: Agent Starting Run: eotz7o56 with config:
[34m[1mwandb[0m: 	learning_rate: 0.00037858989026500714
[34m[1mwandb[0m: 	train_batch_size: 128


Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at roberta-base and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
convert squad examples to features: 100%|██████████| 949/949 [00:05<00:00, 176.02it/s]
add example index and unique id: 100%|██████████| 949/949 [00:00<00:00, 180377.69it/s]


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]



VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Running Epoch 0 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:01<00:00, 262.21it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 353514.65it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:01<00:00, 260.96it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 485475.26it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 150.34it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 294552.44it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

convert squad examples to features: 100%|██████████| 480/480 [00:01<00:00, 253.54it/s]
add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 430276.97it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
correct,▅▁█
eval_loss,█▃▁
global_step,▁▅█
incorrect,▅█▁
similar,▃█▁
train_loss,█▅▁

0,1
correct,255.0
eval_loss,-4.90674
global_step,24.0
incorrect,0.0
similar,225.0
train_loss,2.65719


[34m[1mwandb[0m: Agent Starting Run: rbuu18tj with config:
[34m[1mwandb[0m: 	learning_rate: 0.0003230607218329973
[34m[1mwandb[0m: 	train_batch_size: 128


Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at roberta-base and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
convert squad examples to features: 100%|██████████| 949/949 [00:05<00:00, 175.75it/s]
add example index and unique id: 100%|██████████| 949/949 [00:00<00:00, 365677.03it/s]


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]



VBox(children=(Label(value='0.001 MB of 0.010 MB uploaded\r'), FloatProgress(value=0.11411439114391143, max=1.…

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112698766666856, max=1.0…

Running Epoch 0 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:02<00:00, 174.20it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 427808.31it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 143.89it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 281812.14it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:01<00:00, 240.95it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 311989.14it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 142.55it/s]
add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 285326.80it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
correct,▁██
eval_loss,█▄▁
global_step,▁▅█
incorrect,█▂▁
similar,█▁▁
train_loss,█▂▁

0,1
correct,254.0
eval_loss,-4.50732
global_step,24.0
incorrect,1.0
similar,225.0
train_loss,2.63166


[34m[1mwandb[0m: Agent Starting Run: ehvi3bgn with config:
[34m[1mwandb[0m: 	learning_rate: 6.677235089882928e-05
[34m[1mwandb[0m: 	train_batch_size: 128


Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at roberta-base and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
convert squad examples to features: 100%|██████████| 949/949 [00:06<00:00, 147.20it/s]
add example index and unique id: 100%|██████████| 949/949 [00:00<00:00, 397880.30it/s]


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Running Epoch 0 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:01<00:00, 273.45it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 300397.78it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 133.25it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 401769.29it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:02<00:00, 178.47it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 377936.16it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

convert squad examples to features: 100%|██████████| 480/480 [00:04<00:00, 115.41it/s]
add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 248091.92it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.016 MB uploaded\r'), FloatProgress(value=0.07608093978719478, max=1.…

0,1
correct,▁█▃
eval_loss,█▁▁
global_step,▁▅█
incorrect,█▁▂
similar,▁▁█
train_loss,█▄▁

0,1
correct,110.0
eval_loss,-3.53857
global_step,24.0
incorrect,46.0
similar,324.0
train_loss,2.23819


[34m[1mwandb[0m: Agent Starting Run: mrckn8qf with config:
[34m[1mwandb[0m: 	learning_rate: 0.000227089645292035
[34m[1mwandb[0m: 	train_batch_size: 64


Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at roberta-base and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
convert squad examples to features: 100%|██████████| 949/949 [00:07<00:00, 119.81it/s]
add example index and unique id: 100%|██████████| 949/949 [00:00<00:00, 132397.37it/s]


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Running Epoch 0 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:02<00:00, 184.44it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 334152.02it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 145.89it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 112769.05it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 131.51it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 201346.73it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

convert squad examples to features: 100%|██████████| 480/480 [00:01<00:00, 253.55it/s]
add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 283119.94it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

VBox(children=(Label(value='0.006 MB of 0.015 MB uploaded\r'), FloatProgress(value=0.3870311506675143, max=1.0…

0,1
correct,█▁▃
eval_loss,█▄▁
global_step,▁▅█
incorrect,▁█▇
similar,▁█▆
train_loss,█▅▁

0,1
correct,135.0
eval_loss,-6.13184
global_step,48.0
incorrect,41.0
similar,304.0
train_loss,1.28553


[34m[1mwandb[0m: Agent Starting Run: gmppgmxk with config:
[34m[1mwandb[0m: 	learning_rate: 0.0003753443751271437
[34m[1mwandb[0m: 	train_batch_size: 64


Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at roberta-base and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
convert squad examples to features: 100%|██████████| 949/949 [00:04<00:00, 228.01it/s]
add example index and unique id: 100%|██████████| 949/949 [00:00<00:00, 442413.53it/s]


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]



VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112838588890858, max=1.0…

Running Epoch 0 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:04<00:00, 117.39it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 233774.49it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:02<00:00, 176.13it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 269441.37it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:04<00:00, 119.16it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 80743.80it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

convert squad examples to features: 100%|██████████| 480/480 [00:01<00:00, 246.98it/s]
add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 369542.20it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
correct,█▇▁
eval_loss,█▁▁
global_step,▁▅█
incorrect,▁▂█
similar,▁▂█
train_loss,█▇▁

0,1
correct,66.0
eval_loss,-5.53125
global_step,48.0
incorrect,70.0
similar,344.0
train_loss,1.75036


[34m[1mwandb[0m: Agent Starting Run: sv2k02fb with config:
[34m[1mwandb[0m: 	learning_rate: 0.0003923438278886478
[34m[1mwandb[0m: 	train_batch_size: 128


Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at roberta-base and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
convert squad examples to features: 100%|██████████| 949/949 [00:04<00:00, 202.36it/s]
add example index and unique id: 100%|██████████| 949/949 [00:00<00:00, 212344.33it/s]


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]



VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Running Epoch 0 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:01<00:00, 253.57it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 361188.72it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 145.76it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 381155.99it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:02<00:00, 169.18it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 321762.17it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 129.31it/s]
add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 258541.92it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.015 MB uploaded\r'), FloatProgress(value=0.07652303120356613, max=1.…

0,1
correct,▁██
eval_loss,█▅▁
global_step,▁▅█
incorrect,█▁▁
similar,█▁▁
train_loss,█▄▁

0,1
correct,254.0
eval_loss,-5.01904
global_step,24.0
incorrect,0.0
similar,226.0
train_loss,2.69028


[34m[1mwandb[0m: Agent Starting Run: wg4qjb61 with config:
[34m[1mwandb[0m: 	learning_rate: 7.144377770390028e-05
[34m[1mwandb[0m: 	train_batch_size: 64


Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at roberta-base and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
convert squad examples to features: 100%|██████████| 949/949 [00:07<00:00, 127.56it/s]
add example index and unique id: 100%|██████████| 949/949 [00:00<00:00, 274396.42it/s]


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Running Epoch 0 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 134.75it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 90083.04it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:02<00:00, 218.63it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 282921.01it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 149.06it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 326299.18it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 130.24it/s]
add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 243383.21it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.015 MB uploaded\r'), FloatProgress(value=0.07922569066085507, max=1.…

0,1
correct,▃▁█
eval_loss,█▃▁
global_step,▁▅█
incorrect,█▅▁
similar,▁█▁
train_loss,▇█▁

0,1
correct,90.0
eval_loss,-4.49121
global_step,48.0
incorrect,63.0
similar,327.0
train_loss,1.57345


[34m[1mwandb[0m: Agent Starting Run: 20o6ap7v with config:
[34m[1mwandb[0m: 	learning_rate: 7.08091631997978e-05
[34m[1mwandb[0m: 	train_batch_size: 64


Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at roberta-base and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
convert squad examples to features: 100%|██████████| 949/949 [00:06<00:00, 154.20it/s]
add example index and unique id: 100%|██████████| 949/949 [00:00<00:00, 440017.08it/s]


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Running Epoch 0 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:02<00:00, 187.58it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 357977.58it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:01<00:00, 274.77it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 280711.92it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 136.97it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 235112.22it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

convert squad examples to features: 100%|██████████| 480/480 [00:01<00:00, 310.39it/s]
add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 418906.77it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
correct,▁▅█
eval_loss,█▃▁
global_step,▁▅█
incorrect,█▃▁
similar,█▇▁
train_loss,▆█▁

0,1
correct,99.0
eval_loss,-4.27246
global_step,48.0
incorrect,55.0
similar,326.0
train_loss,1.74691


[34m[1mwandb[0m: Agent Starting Run: g95quee2 with config:
[34m[1mwandb[0m: 	learning_rate: 0.0001344119669217322
[34m[1mwandb[0m: 	train_batch_size: 64


Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at roberta-base and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
convert squad examples to features: 100%|██████████| 949/949 [00:07<00:00, 133.98it/s]
add example index and unique id: 100%|██████████| 949/949 [00:00<00:00, 484350.75it/s]


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Running Epoch 0 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:01<00:00, 244.75it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 313886.17it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:04<00:00, 109.69it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 330965.96it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:02<00:00, 224.74it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 320379.68it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 138.45it/s]
add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 296112.06it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
correct,▂▁█
eval_loss,█▂▁
global_step,▁▅█
incorrect,█▄▁
similar,▁█▂
train_loss,▆█▁

0,1
correct,135.0
eval_loss,-5.59375
global_step,48.0
incorrect,38.0
similar,307.0
train_loss,1.14481


[34m[1mwandb[0m: Agent Starting Run: 456e0mtu with config:
[34m[1mwandb[0m: 	learning_rate: 0.00039598303043638825
[34m[1mwandb[0m: 	train_batch_size: 128


Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at roberta-base and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
convert squad examples to features: 100%|██████████| 949/949 [00:08<00:00, 110.35it/s]
add example index and unique id: 100%|██████████| 949/949 [00:00<00:00, 460054.84it/s]


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Running Epoch 0 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 141.60it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 336779.18it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:01<00:00, 259.91it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 285084.38it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 122.69it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 387613.77it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 146.54it/s]
add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 64666.62it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.015 MB uploaded\r'), FloatProgress(value=0.07652303120356613, max=1.…

0,1
correct,▁██
eval_loss,█▃▁
global_step,▁▅█
incorrect,█▁▁
similar,▁██
train_loss,█▃▁

0,1
correct,255.0
eval_loss,-3.86768
global_step,24.0
incorrect,0.0
similar,225.0
train_loss,2.82832


[34m[1mwandb[0m: Agent Starting Run: doxefurg with config:
[34m[1mwandb[0m: 	learning_rate: 0.0003894786992258518
[34m[1mwandb[0m: 	train_batch_size: 128


Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at roberta-base and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
convert squad examples to features: 100%|██████████| 949/949 [00:06<00:00, 141.54it/s]
add example index and unique id: 100%|██████████| 949/949 [00:00<00:00, 329885.17it/s]


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]



VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.10798286263880388, max=1.…

Running Epoch 0 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:02<00:00, 174.42it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 333985.72it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:02<00:00, 228.21it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 193193.16it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 120.21it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 275601.08it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

convert squad examples to features: 100%|██████████| 480/480 [00:02<00:00, 216.70it/s]
add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 199155.79it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.016 MB uploaded\r'), FloatProgress(value=0.07543484894720781, max=1.…

0,1
correct,▁██
eval_loss,█▂▁
global_step,▁▅█
incorrect,█▁▁
similar,█▁▁
train_loss,█▄▁

0,1
correct,252.0
eval_loss,-4.83154
global_step,24.0
incorrect,2.0
similar,226.0
train_loss,2.56834


[34m[1mwandb[0m: Agent Starting Run: 0utkdv6g with config:
[34m[1mwandb[0m: 	learning_rate: 4.1238539740178035e-05
[34m[1mwandb[0m: 	train_batch_size: 64


Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at roberta-base and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
convert squad examples to features: 100%|██████████| 949/949 [00:06<00:00, 157.88it/s]
add example index and unique id: 100%|██████████| 949/949 [00:00<00:00, 379446.57it/s]


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Running Epoch 0 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 153.78it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 318806.95it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 149.35it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 327733.34it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:04<00:00, 104.78it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 132967.83it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 159.69it/s]
add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 393600.38it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20292234444262025, max=1.…

0,1
correct,▁▄█
eval_loss,█▃▁
global_step,▁▅█
incorrect,█▂▁
similar,▁█▇
train_loss,█▆▁

0,1
correct,70.0
eval_loss,-4.05151
global_step,48.0
incorrect,67.0
similar,343.0
train_loss,2.48701


[34m[1mwandb[0m: Agent Starting Run: c7kv7boe with config:
[34m[1mwandb[0m: 	learning_rate: 0.0001253164126548294
[34m[1mwandb[0m: 	train_batch_size: 128


Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at roberta-base and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
convert squad examples to features: 100%|██████████| 949/949 [00:06<00:00, 139.73it/s]
add example index and unique id: 100%|██████████| 949/949 [00:00<00:00, 295808.15it/s]


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]



VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.10781577110956037, max=1.…

Running Epoch 0 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:02<00:00, 175.88it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 324301.86it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:01<00:00, 247.39it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 319363.25it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:02<00:00, 191.03it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 429909.44it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 140.60it/s]
add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 294466.27it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
correct,█▁▁
eval_loss,█▄▁
global_step,▁▅█
incorrect,▁█▇
similar,▁██
train_loss,█▄▁

0,1
correct,82.0
eval_loss,-3.7229
global_step,24.0
incorrect,68.0
similar,330.0
train_loss,1.78755


[34m[1mwandb[0m: Agent Starting Run: utt1np0q with config:
[34m[1mwandb[0m: 	learning_rate: 0.0001410279914097703
[34m[1mwandb[0m: 	train_batch_size: 64


Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at roberta-base and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
convert squad examples to features: 100%|██████████| 949/949 [00:04<00:00, 221.88it/s]
add example index and unique id: 100%|██████████| 949/949 [00:00<00:00, 301385.21it/s]


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Running Epoch 0 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 151.28it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 316352.28it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 135.48it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 332222.10it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 140.55it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 314867.99it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 129.17it/s]
add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 281970.02it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.015 MB uploaded\r'), FloatProgress(value=0.07921045885670341, max=1.…

0,1
correct,█▁▅
eval_loss,█▄▁
global_step,▁▅█
incorrect,▁█▂
similar,▁█▅
train_loss,█▅▁

0,1
correct,143.0
eval_loss,-5.85205
global_step,48.0
incorrect,33.0
similar,304.0
train_loss,1.17083


[34m[1mwandb[0m: Agent Starting Run: yttk4yu9 with config:
[34m[1mwandb[0m: 	learning_rate: 0.00020450809306267568
[34m[1mwandb[0m: 	train_batch_size: 64


Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at roberta-base and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
convert squad examples to features: 100%|██████████| 949/949 [00:06<00:00, 149.32it/s]
add example index and unique id: 100%|██████████| 949/949 [00:00<00:00, 328903.86it/s]


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Running Epoch 0 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:05<00:00, 91.86it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 129678.96it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 120.68it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 325244.90it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:02<00:00, 215.38it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 335768.17it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 158.21it/s]
add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 320736.96it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
correct,▁█▇
eval_loss,█▂▁
global_step,▁▅█
incorrect,█▂▁
similar,▁▂█
train_loss,█▅▁

0,1
correct,122.0
eval_loss,-6.2334
global_step,48.0
incorrect,41.0
similar,317.0
train_loss,1.12387


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: e6rkfs8h with config:
[34m[1mwandb[0m: 	learning_rate: 0.0003115639253924678
[34m[1mwandb[0m: 	train_batch_size: 64


Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at roberta-base and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
convert squad examples to features: 100%|██████████| 949/949 [00:06<00:00, 156.09it/s]
add example index and unique id: 100%|██████████| 949/949 [00:00<00:00, 186008.43it/s]


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Running Epoch 0 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:01<00:00, 254.28it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 118657.74it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:02<00:00, 227.32it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 399536.80it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:02<00:00, 225.79it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 215483.88it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 120.17it/s]
add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 129980.37it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
correct,█▁▁
eval_loss,█▅▁
global_step,▁▅█
incorrect,▁▇█
similar,▁█▇
train_loss,█▇▁

0,1
correct,147.0
eval_loss,-5.68506
global_step,48.0
incorrect,48.0
similar,285.0
train_loss,1.47381


[34m[1mwandb[0m: Agent Starting Run: 8hvechvz with config:
[34m[1mwandb[0m: 	learning_rate: 0.000264065755040823
[34m[1mwandb[0m: 	train_batch_size: 64


Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at roberta-base and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
convert squad examples to features: 100%|██████████| 949/949 [00:06<00:00, 148.46it/s]
add example index and unique id: 100%|██████████| 949/949 [00:00<00:00, 274832.18it/s]


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]



VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.1080702981551106, max=1.0…

Running Epoch 0 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 124.56it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 331183.73it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 155.70it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 309447.57it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:02<00:00, 219.54it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 382386.69it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

convert squad examples to features: 100%|██████████| 480/480 [00:04<00:00, 108.83it/s]
add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 123794.25it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.015 MB uploaded\r'), FloatProgress(value=0.0792745449884645, max=1.0…

0,1
correct,█▄▁
eval_loss,█▃▁
global_step,▁▅█
incorrect,█▇▁
similar,▁▅█
train_loss,█▂▁

0,1
correct,142.0
eval_loss,-6.07568
global_step,48.0
incorrect,30.0
similar,308.0
train_loss,1.6945


[34m[1mwandb[0m: Agent Starting Run: ghdariwd with config:
[34m[1mwandb[0m: 	learning_rate: 0.0003216643256849616
[34m[1mwandb[0m: 	train_batch_size: 64


Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at roberta-base and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
convert squad examples to features: 100%|██████████| 949/949 [00:06<00:00, 140.68it/s]
add example index and unique id: 100%|██████████| 949/949 [00:00<00:00, 380207.71it/s]


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]



VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Running Epoch 0 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 147.17it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 347534.25it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 121.03it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 85333.19it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/16 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:02<00:00, 233.14it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 142270.22it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

convert squad examples to features: 100%|██████████| 480/480 [00:02<00:00, 234.32it/s]
add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 352092.68it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

VBox(children=(Label(value='0.014 MB of 0.015 MB uploaded\r'), FloatProgress(value=0.9418560606060606, max=1.0…

0,1
correct,▂▁█
eval_loss,▁▄█
global_step,▁▅█
incorrect,▇▁█
similar,▃█▁
train_loss,█▁▁

0,1
correct,6.0
eval_loss,-0.0593
global_step,48.0
incorrect,193.0
similar,281.0
train_loss,4.8678


[34m[1mwandb[0m: Agent Starting Run: vbex72qy with config:
[34m[1mwandb[0m: 	learning_rate: 0.0002201504224585913
[34m[1mwandb[0m: 	train_batch_size: 128


Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at roberta-base and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
convert squad examples to features: 100%|██████████| 949/949 [00:06<00:00, 151.13it/s]
add example index and unique id: 100%|██████████| 949/949 [00:00<00:00, 328171.70it/s]


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]



VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.10780636720453554, max=1.…

Running Epoch 0 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 156.58it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 237890.34it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:03<00:00, 127.42it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 119808.73it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/8 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/480 [00:00<?, ?it/s][A
convert squad examples to features: 100%|██████████| 480/480 [00:04<00:00, 116.12it/s]

add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 320839.19it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

convert squad examples to features: 100%|██████████| 480/480 [00:01<00:00, 246.29it/s]
add example index and unique id: 100%|██████████| 480/480 [00:00<00:00, 263344.14it/s]


Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
correct,█▁▄
eval_loss,█▂▁
global_step,▁▅█
incorrect,▁█▅
similar,▁█▅
train_loss,█▃▁

0,1
correct,133.0
eval_loss,-4.67383
global_step,24.0
incorrect,51.0
similar,296.0
train_loss,1.50043


[34m[1mwandb[0m: Agent Starting Run: 0gku6guh with config:
[34m[1mwandb[0m: 	learning_rate: 6.618208041366051e-05
[34m[1mwandb[0m: 	train_batch_size: 128


Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at roberta-base and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
convert squad examples to features: 100%|██████████| 949/949 [00:03<00:00, 260.51it/s]
add example index and unique id: 100%|██████████| 949/949 [00:00<00:00, 505639.54it/s]


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]



VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Running Epoch 0 of 3:   0%|          | 0/8 [00:00<?, ?it/s]

