In [1]:
import os
import sys
from pathlib import Path

if "workding_dir" not in locals():
    workding_dir = str(Path.cwd().parent)
os.chdir(workding_dir)
sys.path.append(workding_dir)
print("working dir:", workding_dir)

working dir: c:\Users\admin\code\CrediNews


In [2]:
from dotenv import find_dotenv, load_dotenv

found_dotenv = find_dotenv(".env")

if len(found_dotenv) == 0:
    found_dotenv = find_dotenv(".env.example")
print(f"loading env vars from: {found_dotenv}")
load_dotenv(found_dotenv, override=True)

loading env vars from: c:\Users\admin\code\CrediNews\.env


True

In [3]:
# !pip install wandb
import os
os.getenv("WANDB_API_KEY", "empty")[-5:]

'f541b'

In [4]:
import os
from transformers import DataCollatorWithPadding, AutoTokenizer, AutoModelForSequenceClassification

# Change these as needed.
model_name = "FacebookAI/roberta-base"

# Triggers tokenizer download to cache
tokenizer = AutoTokenizer.from_pretrained(model_name)
print("Downloading and caching pre-trained model")

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# Triggers model download to cache
AutoModelForSequenceClassification.from_pretrained(model_name)


Downloading and caching pre-trained model


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
         

## Training

With everything now downloaded and cached, we can now set up our training function. Our training function defines the training execution for a single hyperparameter configuration. For now we pull these hyperparameters from a config argument, but we’ll see later how this is passed in.

First we get our datasets- we only use the first half of the dev dataset for validation, and leave the rest of testing:

In [5]:
from datasets import load_dataset, concatenate_datasets, Dataset

datasets = load_dataset(
    "csv",
    data_files={
        "train": [
            "dataset/train_data_1.csv",
            "dataset/train_data_2.csv",
            "dataset/train_data_3.csv",
            "dataset/train_data_4.csv",
        ],
        "test": "dataset/test_data.csv",
        "rewritten_train": [
            "dataset/rewritten_train_data_1.csv",
            "dataset/rewritten_train_data_2.csv",
            "dataset/rewritten_train_data_3.csv",
            "dataset/rewritten_train_data_4.csv",
        ],
        "rewritten_test": "dataset/rewritten_test_data.csv",
    },
)

# Tokenize the dataset
def tokenize_function(example):
    return tokenizer(example["processed_full_content"], padding="max_length", truncation=True)

tokenized_datasets = datasets.map(tokenize_function, batched=True)

def get_datasets(config="original"):
    if config == "rewritten":
        train_dataset = tokenized_datasets["rewritten_train"]
        eval_dataset = tokenized_datasets["rewritten_test"]
    elif config == "original":
        train_dataset = tokenized_datasets["train"]
        eval_dataset = tokenized_datasets["test"]
    else:
        train_dataset = concatenate_datasets(
            [tokenized_datasets["train"], tokenized_datasets["rewritten_train"]]
        )
        eval_dataset = concatenate_datasets(
            [tokenized_datasets["test"], tokenized_datasets["rewritten_test"]]
        )

    return train_dataset, eval_dataset

Map:   0%|          | 0/54441 [00:00<?, ? examples/s]

In [6]:
get_datasets(config="original"), get_datasets(config="rewritten"), get_datasets(config="all")

((Dataset({
      features: ['label', 'full_content', 'processed_full_content', 'input_ids', 'attention_mask'],
      num_rows: 54441
  }),
  Dataset({
      features: ['label', 'full_content', 'processed_full_content', 'input_ids', 'attention_mask'],
      num_rows: 6050
  })),
 (Dataset({
      features: ['label', 'full_content', 'processed_full_content', 'input_ids', 'attention_mask'],
      num_rows: 54441
  }),
  Dataset({
      features: ['label', 'full_content', 'processed_full_content', 'input_ids', 'attention_mask'],
      num_rows: 6050
  })),
 (Dataset({
      features: ['label', 'full_content', 'processed_full_content', 'input_ids', 'attention_mask'],
      num_rows: 108882
  }),
  Dataset({
      features: ['label', 'full_content', 'processed_full_content', 'input_ids', 'attention_mask'],
      num_rows: 12100
  })))

In [7]:
def wandb_hp_space(trial):
    return {
        "method": "random",
        "metric": {"name": "accuracy", "goal": "maximize"},
        "parameters": {
            "learning_rate": {"distribution": "uniform", "min": 1e-6, "max": 1e-4},
            "per_device_train_batch_size": {"values": [16, 32]},
        },
    }

In [8]:
from transformers import AutoModelForSequenceClassification
import numpy as np
import evaluate
from transformers import EvalPrediction

# Load the accuracy metric from the evaluate library
accuracy_metric = evaluate.load("accuracy")

def compute_metrics(eval_pred: EvalPrediction) -> dict:
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = accuracy_metric.compute(predictions=predictions, references=labels)
    return accuracy

def model_init():
    return AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)


In [9]:
from transformers import Trainer, TrainingArguments

def do_grid_search(config):
    train_dataset, eval_dataset = get_datasets(config)
    training_args = TrainingArguments(
        output_dir="./results",
        evaluation_strategy="epoch",
        save_strategy="epoch",
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        num_train_epochs=4,
        weight_decay=0.01,
    )

    trainer = Trainer(
        model_init=model_init,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        compute_metrics=compute_metrics,
        tokenizer=tokenizer,
        data_collator=data_collator,
    )

    return trainer.hyperparameter_search(
        direction="maximize",
        backend="wandb", # or "ray", "optuna", "sigopt"
        hp_space=wandb_hp_space, #ray_hp_space, 
        n_trials=5,
    )

In [10]:
%%time

best_trial = do_grid_search("original")
best_trial

  trainer = Trainer(
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Create sweep with ID: y4uiki80
Sweep URL: https://wandb.ai/inflaton-ai/uncategorized/sweeps/y4uiki80


[34m[1mwandb[0m: Agent Starting Run: nzq3a7jq with config:
[34m[1mwandb[0m: 	learning_rate: 1.0171689812737018e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 16
[34m[1mwandb[0m: Currently logged in as: [33minflaton-sg[0m ([33minflaton-ai[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Trying to set _wandb in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set assignments in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set metric in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Epoch,Training Loss,Validation Loss,Accuracy
1,0.0565,0.045244,0.98876
2,0.0326,0.049407,0.991405
3,0.0175,0.04608,0.992066
4,0.0081,0.038603,0.993554


0,1
eval/accuracy,▁▅▆█
eval/loss,▅█▆▁
eval/runtime,█▆▄▁
eval/samples_per_second,▁▃▅█
eval/steps_per_second,▁▃▅█
train/epoch,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇████
train/grad_norm,▃▁▁▁▄▁▁▃▁▁▁▂▁▁▁▁█▁▁▁▁▁▁▁▁▁▁
train/learning_rate,██▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▃▃▃▃▂▂▂▂▁▁
train/loss,█▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▂▂▁▁▁▁▁▁▁▁

0,1
eval/accuracy,0.99355
eval/loss,0.0386
eval/runtime,33.4832
eval/samples_per_second,180.688
eval/steps_per_second,22.608
total_flos,5.729611585941504e+16
train/epoch,4.0
train/global_step,13612.0
train/grad_norm,0.00262
train/learning_rate,0.0


[34m[1mwandb[0m: Agent Starting Run: f4kk5d3s with config:
[34m[1mwandb[0m: 	learning_rate: 7.57253058741028e-06
[34m[1mwandb[0m: 	per_device_train_batch_size: 32


Trying to set _wandb in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set assignments in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set metric in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Epoch,Training Loss,Validation Loss,Accuracy
1,0.0596,0.038689,0.988264
2,0.038,0.040616,0.99124
3,0.0235,0.044294,0.99124
4,0.0129,0.038807,0.992893


0,1
eval/accuracy,▁▆▆█
eval/loss,▁▃█▁
eval/runtime,▁▁▁█
eval/samples_per_second,███▁
eval/steps_per_second,███▁
train/epoch,▁▂▂▂▃▃▄▄▄▅▅▆▆▇▇███
train/global_step,▁▂▂▂▃▃▄▄▄▅▅▆▆▇▇███
train/grad_norm,▂▄▂▂▁▁█▁▁▁▂▁▁
train/learning_rate,█▇▇▆▆▅▄▄▃▃▂▂▁
train/loss,█▃▃▂▂▂▂▂▁▁▁▁▁

0,1
eval/accuracy,0.99289
eval/loss,0.03881
eval/runtime,81.8643
eval/samples_per_second,73.903
eval/steps_per_second,9.247
total_flos,5.729611585941504e+16
train/epoch,4.0
train/global_step,6808.0
train/grad_norm,0.00626
train/learning_rate,0.0


[34m[1mwandb[0m: Agent Starting Run: x5xvwstt with config:
[34m[1mwandb[0m: 	learning_rate: 1.9554285493551877e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 16


Trying to set _wandb in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set assignments in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set metric in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Epoch,Training Loss,Validation Loss,Accuracy
1,0.0565,0.03302,0.991901
2,0.0306,0.036089,0.992562
3,0.0127,0.044588,0.993058
4,0.0026,0.039041,0.993719


0,1
eval/accuracy,▁▄▅█
eval/loss,▁▃█▅
eval/runtime,▁▂█▄
eval/samples_per_second,█▇▁▅
eval/steps_per_second,█▇▁▅
train/epoch,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇████
train/grad_norm,▁▁▁▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/learning_rate,██▇▇▇▇▆▆▆▆▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▁▁
train/loss,█▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/accuracy,0.99372
eval/loss,0.03904
eval/runtime,81.4399
eval/samples_per_second,74.288
eval/steps_per_second,9.295
total_flos,5.729611585941504e+16
train/epoch,4.0
train/global_step,13612.0
train/grad_norm,0.00375
train/learning_rate,0.0


[34m[1mwandb[0m: Agent Starting Run: x5tw0tc4 with config:
[34m[1mwandb[0m: 	learning_rate: 2.854559097171493e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 16


Trying to set _wandb in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set assignments in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set metric in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Epoch,Training Loss,Validation Loss,Accuracy
1,0.0613,0.044948,0.990413
2,0.0417,0.030354,0.993554
3,0.0138,0.027831,0.993223
4,0.0086,0.035968,0.993719


0,1
eval/accuracy,▁█▇█
eval/loss,█▂▁▄
eval/runtime,▇█▂▁
eval/samples_per_second,▂▁▇█
eval/steps_per_second,▂▁▇█
train/epoch,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇████
train/grad_norm,▁▄▁▁▁█▁▁▁▁▁▁▁▁▁▁▂▁▃▁▁▁▁▁▁▁▁
train/learning_rate,██▇▇▇▇▆▆▆▆▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▁▁
train/loss,█▄▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▂▂▁▁▁▁▁▁▁▁

0,1
eval/accuracy,0.99372
eval/loss,0.03597
eval/runtime,78.6529
eval/samples_per_second,76.92
eval/steps_per_second,9.625
total_flos,5.729611585941504e+16
train/epoch,4.0
train/global_step,13612.0
train/grad_norm,0.00515
train/learning_rate,0.0


[34m[1mwandb[0m: Agent Starting Run: l4087hok with config:
[34m[1mwandb[0m: 	learning_rate: 8.900405714208889e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 16


Trying to set _wandb in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set assignments in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set metric in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Epoch,Training Loss,Validation Loss,Accuracy
1,0.6938,0.693522,0.437521
2,0.6868,0.68691,0.562479
3,0.6844,0.685599,0.562479
4,0.6847,0.685331,0.562479


0,1
eval/accuracy,▁███
eval/loss,█▂▁▁
eval/runtime,█▇▁▁
eval/samples_per_second,▁▂██
eval/steps_per_second,▁▂██
train/epoch,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇████
train/grad_norm,▂▃▃▃▂▄▂▃█▃▂▂▅▃▃▃▂▁▃▂▁▂▃▂▁▃▃
train/learning_rate,██▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▃▃▃▃▂▂▂▂▁▁
train/loss,▁███████▇█▇▇▇▇▇▇█▇▇▇▇▇▇▇▇▇▇

0,1
eval/accuracy,0.56248
eval/loss,0.68533
eval/runtime,77.0014
eval/samples_per_second,78.57
eval/steps_per_second,9.831
total_flos,5.729611585941504e+16
train/epoch,4.0
train/global_step,13612.0
train/grad_norm,1.39089
train/learning_rate,0.0


CPU times: total: 8h 30min 42s
Wall time: 9h 27min 1s


BestRun(run_id='x5xvwstt', objective=0.9937190082644628, hyperparameters={'learning_rate': 1.9554285493551877e-05, 'per_device_train_batch_size': 16, 'assignments': {}, 'metric': 'eval/loss'}, run_summary=None)

In [11]:
%%time

best_trial_combined = do_grid_search("combined")
best_trial_combined

  trainer = Trainer(
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Create sweep with ID: 86aapprs
Sweep URL: https://wandb.ai/inflaton-ai/uncategorized/sweeps/86aapprs


[34m[1mwandb[0m: Agent Starting Run: nr9jb96s with config:
[34m[1mwandb[0m: 	learning_rate: 7.690367865493667e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 16


Trying to set _wandb in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set assignments in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set metric in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Epoch,Training Loss,Validation Loss,Accuracy
1,0.6877,0.68538,0.562479
2,0.6842,0.685331,0.562479
3,0.686,0.686417,0.562479
4,0.6878,0.685369,0.562479


0,1
eval/accuracy,▁▁▁▁
eval/loss,▁▁█▁
eval/runtime,▁███
eval/samples_per_second,█▁▁▁
eval/steps_per_second,█▁▁▁
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
train/global_step,▁▁▁▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇████
train/grad_norm,▅▂▇▃▅▂▃▂▃▁▂▆▄▄▃▆▁▆▁▂▂▆▃▂▄▆▅▅█▁▄▇▇▂▃▂▂█▃▅
train/learning_rate,████▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁
train/loss,▇▇█▆▅▃▅▆▆▅▅▅▃▅▄▅▃▄▅▅▆▃▃▅▁▂▄▄▅▂▅▃▄▃▃▃▄▃▃▅

0,1
eval/accuracy,0.56248
eval/loss,0.68537
eval/runtime,158.5734
eval/samples_per_second,76.305
eval/steps_per_second,9.541
total_flos,1.1459223171883008e+17
train/epoch,4.0
train/global_step,27224.0
train/grad_norm,1.04727
train/learning_rate,0.0


[34m[1mwandb[0m: Agent Starting Run: dak80c7c with config:
[34m[1mwandb[0m: 	learning_rate: 5.0310834517178015e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 16


Trying to set _wandb in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set assignments in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set metric in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Epoch,Training Loss,Validation Loss,Accuracy
1,0.6896,0.685353,0.562479
2,0.6854,0.685372,0.562479
3,0.6871,0.687132,0.562479
4,0.6884,0.685415,0.562479


0,1
eval/accuracy,▁▁▁▁
eval/loss,▁▁█▁
eval/runtime,█▂▁▁
eval/samples_per_second,▁▇██
eval/steps_per_second,▁▇██
train/epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
train/grad_norm,▇▆▂▃▂▃▅▂▂▂▁▄▂▄▃▃▃▄▂▁▂▂█▂▃▄▂▂▂▄█▂▃▇▂▂▂▆▂▂
train/learning_rate,████▇▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁
train/loss,▂▁▆███████████▇█████████████████████████

0,1
eval/accuracy,0.56248
eval/loss,0.68541
eval/runtime,155.0431
eval/samples_per_second,78.043
eval/steps_per_second,9.759
total_flos,1.1459223171883008e+17
train/epoch,4.0
train/global_step,27224.0
train/grad_norm,1.82586
train/learning_rate,0.0


[34m[1mwandb[0m: Agent Starting Run: knr5azte with config:
[34m[1mwandb[0m: 	learning_rate: 4.443033730929008e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 16


Trying to set _wandb in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set assignments in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set metric in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Epoch,Training Loss,Validation Loss,Accuracy
1,0.6418,0.633579,0.63405
2,0.6843,0.685325,0.562479
3,0.6862,0.686418,0.562479
4,0.6885,0.685359,0.562479


0,1
eval/accuracy,█▁▁▁
eval/loss,▁███
eval/runtime,▄▃█▁
eval/samples_per_second,▅▆▁█
eval/steps_per_second,▅▆▁█
train/epoch,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
train/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇███
train/grad_norm,█▁▂▂▃▁▂▁▁▄▁▃▂▂▂▂▃▁▄▁▂▃▁▂▁▂▂▃▁▁▃▃▁▂▃▁▃▁▁▃
train/learning_rate,████▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁
train/loss,▁▁▂▂▁▂▅▇█▇██████████████████████████████

0,1
eval/accuracy,0.56248
eval/loss,0.68536
eval/runtime,154.7956
eval/samples_per_second,78.168
eval/steps_per_second,9.774
total_flos,1.1459223171883008e+17
train/epoch,4.0
train/global_step,27224.0
train/grad_norm,1.49886
train/learning_rate,0.0


[34m[1mwandb[0m: Agent Starting Run: n61ln67v with config:
[34m[1mwandb[0m: 	learning_rate: 5.464557686244586e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 16


Trying to set _wandb in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set assignments in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set metric in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Epoch,Training Loss,Validation Loss,Accuracy
1,0.6908,0.681901,0.568347
2,0.6006,0.570436,0.740661
3,0.6272,0.635529,0.657273
4,0.6412,0.632333,0.657934


0,1
eval/accuracy,▁█▅▅
eval/loss,█▁▅▅
eval/runtime,▂▁▂█
eval/samples_per_second,▇█▇▁
eval/steps_per_second,▇█▇▁
train/epoch,▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
train/grad_norm,█▃▃▂▂▁▁▂▃▃▂▁▂▁▂▂▁▂▂▁▂▃▃▂▄▂▁▂▂▂▁▂▃▂▂▂▃▂▁▃
train/learning_rate,████▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁
train/loss,▃▁▄▆▃▄▄▄▇███▇▇█▇▆▇▇▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇

0,1
eval/accuracy,0.65793
eval/loss,0.63233
eval/runtime,157.98
eval/samples_per_second,76.592
eval/steps_per_second,9.577
total_flos,1.1459223171883008e+17
train/epoch,4.0
train/global_step,27224.0
train/grad_norm,2.33452
train/learning_rate,0.0


[34m[1mwandb[0m: Agent Starting Run: nxu2t2rk with config:
[34m[1mwandb[0m: 	learning_rate: 4.489344564153318e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 32


Trying to set _wandb in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set assignments in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set metric in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Epoch,Training Loss,Validation Loss,Accuracy
1,0.2052,0.183309,0.949587
2,0.1284,0.15163,0.953719
3,0.0843,0.11611,0.963306
4,0.0604,0.107093,0.966529


0,1
eval/accuracy,▁▃▇█
eval/loss,█▅▂▁
eval/runtime,▇█▁▂
eval/samples_per_second,▂▁█▇
eval/steps_per_second,▂▁█▇
train/epoch,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇████
train/grad_norm,▂▂▂▁▁▁▁▂▁▁▁▂█▃▇▁▁▂▁▁▁▁▁▂▃▄▁
train/learning_rate,██▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▃▃▃▃▂▂▂▂▁▁
train/loss,█▆▅▆▆▆▅▅▄▄▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁

0,1
eval/accuracy,0.96653
eval/loss,0.10709
eval/runtime,157.6586
eval/samples_per_second,76.748
eval/steps_per_second,9.597
total_flos,1.1459223171883008e+17
train/epoch,4.0
train/global_step,13612.0
train/grad_norm,0.22773
train/learning_rate,0.0


CPU times: total: 21h 30min 4s
Wall time: 23h 49min 45s


BestRun(run_id='nxu2t2rk', objective=0.9665289256198347, hyperparameters={'learning_rate': 4.489344564153318e-05, 'per_device_train_batch_size': 32, 'assignments': {}, 'metric': 'eval/loss'}, run_summary=None)

In [12]:
%%time

best_trial_rewritten = do_grid_search("rewritten")
best_trial_rewritten

  trainer = Trainer(
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Create sweep with ID: 5dlzufk5
Sweep URL: https://wandb.ai/inflaton-ai/uncategorized/sweeps/5dlzufk5


[34m[1mwandb[0m: Agent Starting Run: e41ulyzv with config:
[34m[1mwandb[0m: 	learning_rate: 6.500576306112583e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 32


Trying to set _wandb in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set assignments in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set metric in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Epoch,Training Loss,Validation Loss,Accuracy
1,0.4571,0.361635,0.869421
2,0.3923,0.43281,0.855372
3,0.4284,0.666051,0.562479
4,0.7112,0.295145,0.875041


0,1
eval/accuracy,██▁█
eval/loss,▂▄█▁
eval/runtime,██▂▁
eval/samples_per_second,▁▁▇█
eval/steps_per_second,▁▁▇█
train/epoch,▁▂▂▂▃▃▄▄▄▅▅▆▆▇▇███
train/global_step,▁▂▂▂▃▃▄▄▄▅▅▆▆▇▇███
train/grad_norm,▁▃▂▁▁▂▅▂▅▅▃█▇
train/learning_rate,█▇▇▆▆▅▄▄▃▃▂▂▁
train/loss,▂▃▂▂▂▁▃▁▁▂▇▇█

0,1
eval/accuracy,0.87504
eval/loss,0.29514
eval/runtime,77.0926
eval/samples_per_second,78.477
eval/steps_per_second,9.819
total_flos,5.729611585941504e+16
train/epoch,4.0
train/global_step,6808.0
train/grad_norm,3.59975
train/learning_rate,0.0


[34m[1mwandb[0m: Agent Starting Run: dh75od1t with config:
[34m[1mwandb[0m: 	learning_rate: 7.250769260664576e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 32


Trying to set _wandb in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set assignments in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set metric in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Epoch,Training Loss,Validation Loss,Accuracy
1,0.5071,1.611911,0.562479
2,0.5642,0.684876,0.562479
3,0.6856,0.686053,0.562479
4,0.6876,0.685326,0.562479


0,1
eval/accuracy,▁▁▁▁
eval/loss,█▁▁▁
eval/runtime,██▁▁
eval/samples_per_second,▁▁██
eval/steps_per_second,▁▁██
train/epoch,▁▂▂▂▃▃▄▄▄▅▅▆▆▇▇███
train/global_step,▁▂▂▂▃▃▄▄▄▅▅▆▆▇▇███
train/grad_norm,▄█▆▄▁▆▄▅▃▁▁▇▂
train/learning_rate,█▇▇▆▆▅▅▄▃▃▂▂▁
train/loss,▃▁▃▅▂▅███████

0,1
eval/accuracy,0.56248
eval/loss,0.68533
eval/runtime,31.9988
eval/samples_per_second,189.07
eval/steps_per_second,23.657
total_flos,5.729611585941504e+16
train/epoch,4.0
train/global_step,6808.0
train/grad_norm,0.46488
train/learning_rate,0.0


[34m[1mwandb[0m: Agent Starting Run: fe6s0fo9 with config:
[34m[1mwandb[0m: 	learning_rate: 9.049248793794865e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 32


Trying to set _wandb in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set assignments in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set metric in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Epoch,Training Loss,Validation Loss,Accuracy
1,0.6893,0.689573,0.562645
2,0.6872,0.685463,0.562645
3,0.6855,0.685974,0.562479
4,0.6874,0.685331,0.562479


0,1
eval/accuracy,██▁▁
eval/loss,█▁▂▁
eval/runtime,▆█▅▁
eval/samples_per_second,▃▁▄█
eval/steps_per_second,▃▁▄█
train/epoch,▁▂▂▂▃▃▄▄▄▅▅▆▆▇▇███
train/global_step,▁▂▂▂▃▃▄▄▄▅▅▆▆▇▇███
train/grad_norm,▂▆▁▇▂▆▄▆▃▁▁█▂
train/learning_rate,█▇▇▆▆▅▅▄▃▃▂▂▁
train/loss,█▄▆▄▅▄▃▅▄▂▁▃▄

0,1
eval/accuracy,0.56248
eval/loss,0.68533
eval/runtime,31.9628
eval/samples_per_second,189.283
eval/steps_per_second,23.684
total_flos,5.729611585941504e+16
train/epoch,4.0
train/global_step,6808.0
train/grad_norm,0.41097
train/learning_rate,0.0


[34m[1mwandb[0m: Agent Starting Run: 4aqp9idp with config:
[34m[1mwandb[0m: 	learning_rate: 4.976592414556393e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 16


Trying to set _wandb in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set assignments in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set metric in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Epoch,Training Loss,Validation Loss,Accuracy
1,0.6939,0.691139,0.562479
2,0.6884,0.685407,0.562479
3,0.6845,0.685643,0.562479
4,0.4192,0.406343,0.796694


0,1
eval/accuracy,▁▁▁█
eval/loss,███▁
eval/runtime,█▄▁▂
eval/samples_per_second,▁▅█▇
eval/steps_per_second,▁▅█▆
train/epoch,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇████
train/grad_norm,▁▂▂▂▁▃▁▃▁▂▁▁▄▂▂▂▂▁▂▂▁▂▃▂▃▂█
train/learning_rate,██▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▃▃▃▃▂▂▂▂▁▁
train/loss,▇▆▅▂▂███████████████████▄▂▁

0,1
eval/accuracy,0.79669
eval/loss,0.40634
eval/runtime,32.0623
eval/samples_per_second,188.695
eval/steps_per_second,23.61
total_flos,5.729611585941504e+16
train/epoch,4.0
train/global_step,13612.0
train/grad_norm,6.11098
train/learning_rate,0.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7wcm5o65 with config:
[34m[1mwandb[0m: 	learning_rate: 1.2493646207932298e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 16


Trying to set _wandb in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set assignments in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set metric in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Epoch,Training Loss,Validation Loss,Accuracy
1,0.2022,0.165347,0.933388
2,0.1644,0.160493,0.942314
3,0.1263,0.23685,0.939835
4,0.1128,0.225253,0.947603


0,1
eval/accuracy,▁▅▄█
eval/loss,▁▁█▇
eval/runtime,▄▆█▁
eval/samples_per_second,▅▃▁█
eval/steps_per_second,▅▃▁█
train/epoch,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇████
train/grad_norm,▄▂▂▂▃▂▃▅▂▅▁▆▃▄▇▃▆▂▁▁█▇▁▆▁▁▃
train/learning_rate,██▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▃▃▃▃▂▂▂▂▁▁
train/loss,█▅▅▄▄▄▃▃▃▃▂▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁

0,1
eval/accuracy,0.9476
eval/loss,0.22525
eval/runtime,33.0829
eval/samples_per_second,182.874
eval/steps_per_second,22.882
total_flos,5.729611585941504e+16
train/epoch,4.0
train/global_step,13612.0
train/grad_norm,10.88908
train/learning_rate,0.0


CPU times: total: 6h 23min 48s
Wall time: 7h 6min 56s


BestRun(run_id='7wcm5o65', objective=0.947603305785124, hyperparameters={'learning_rate': 1.2493646207932298e-05, 'per_device_train_batch_size': 16, 'assignments': {}, 'metric': 'eval/loss'}, run_summary=None)