In [1]:
import os
import sys
from pathlib import Path

if "workding_dir" not in locals():
    workding_dir = str(Path.cwd().parent)
os.chdir(workding_dir)
sys.path.append(workding_dir)
print("working dir:", workding_dir)

working dir: c:\Users\admin\code\CrediNews


In [2]:
from dotenv import find_dotenv, load_dotenv

found_dotenv = find_dotenv(".env")

if len(found_dotenv) == 0:
    found_dotenv = find_dotenv(".env.example")
print(f"loading env vars from: {found_dotenv}")
load_dotenv(found_dotenv, override=True)

loading env vars from: c:\Users\admin\code\CrediNews\.env


True

In [3]:
# !pip install wandb
import os
os.getenv("WANDB_API_KEY", "empty")[-5:]

'f541b'

In [4]:
import os
from transformers import DataCollatorWithPadding, AutoTokenizer, AutoModelForSequenceClassification

# Change these as needed.
model_name = "bert-base-uncased"

# Triggers tokenizer download to cache
tokenizer = AutoTokenizer.from_pretrained(model_name)
print("Downloading and caching pre-trained model")

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# Triggers model download to cache
AutoModelForSequenceClassification.from_pretrained(model_name)


Downloading and caching pre-trained model


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

## Training

With everything now downloaded and cached, we can now set up our training function. Our training function defines the training execution for a single hyperparameter configuration. For now we pull these hyperparameters from a config argument, but we’ll see later how this is passed in.

First we get our datasets- we only use the first half of the dev dataset for validation, and leave the rest of testing:

In [5]:
from datasets import load_dataset, concatenate_datasets, Dataset

datasets = load_dataset(
    "csv",
    data_files={
        "train": [
            "dataset/train_data_1.csv",
            "dataset/train_data_2.csv",
            "dataset/train_data_3.csv",
            "dataset/train_data_4.csv",
        ],
        "test": "dataset/test_data.csv",
        "rewritten_train": [
            "dataset/rewritten_train_data_1.csv",
            "dataset/rewritten_train_data_2.csv",
            "dataset/rewritten_train_data_3.csv",
            "dataset/rewritten_train_data_4.csv",
        ],
        "rewritten_test": "dataset/rewritten_test_data.csv",
    },
)

# Tokenize the dataset
def tokenize_function(example):
    return tokenizer(example["processed_full_content"], padding="max_length", truncation=True)

tokenized_datasets = datasets.map(tokenize_function, batched=True)

def get_datasets(config="original"):
    if config == "rewritten":
        train_dataset = tokenized_datasets["rewritten_train"]
        eval_dataset = tokenized_datasets["rewritten_test"]
    elif config == "original":
        train_dataset = tokenized_datasets["train"]
        eval_dataset = tokenized_datasets["test"]
    else:
        train_dataset = concatenate_datasets(
            [tokenized_datasets["train"], tokenized_datasets["rewritten_train"]]
        )
        eval_dataset = concatenate_datasets(
            [tokenized_datasets["test"], tokenized_datasets["rewritten_test"]]
        )

    return train_dataset, eval_dataset

Map:   0%|          | 0/54441 [00:00<?, ? examples/s]

In [6]:
get_datasets(config="original"), get_datasets(config="rewritten"), get_datasets(config="all")

((Dataset({
      features: ['label', 'full_content', 'processed_full_content', 'input_ids', 'token_type_ids', 'attention_mask'],
      num_rows: 54441
  }),
  Dataset({
      features: ['label', 'full_content', 'processed_full_content', 'input_ids', 'token_type_ids', 'attention_mask'],
      num_rows: 6050
  })),
 (Dataset({
      features: ['label', 'full_content', 'processed_full_content', 'input_ids', 'token_type_ids', 'attention_mask'],
      num_rows: 54441
  }),
  Dataset({
      features: ['label', 'full_content', 'processed_full_content', 'input_ids', 'token_type_ids', 'attention_mask'],
      num_rows: 6050
  })),
 (Dataset({
      features: ['label', 'full_content', 'processed_full_content', 'input_ids', 'token_type_ids', 'attention_mask'],
      num_rows: 108882
  }),
  Dataset({
      features: ['label', 'full_content', 'processed_full_content', 'input_ids', 'token_type_ids', 'attention_mask'],
      num_rows: 12100
  })))

In [7]:
def wandb_hp_space(trial):
    return {
        "method": "random",
        "metric": {"name": "accuracy", "goal": "maximize"},
        "parameters": {
            "learning_rate": {"distribution": "uniform", "min": 1e-6, "max": 1e-4},
            "per_device_train_batch_size": {"values": [16, 32]},
        },
    }

In [8]:
from transformers import AutoModelForSequenceClassification
import numpy as np
import evaluate
from transformers import EvalPrediction

# Load the accuracy metric from the evaluate library
accuracy_metric = evaluate.load("accuracy")

def compute_metrics(eval_pred: EvalPrediction) -> dict:
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = accuracy_metric.compute(predictions=predictions, references=labels)
    return accuracy

def model_init():
    return AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)


In [9]:
from transformers import Trainer, TrainingArguments

def do_grid_search(config):
    train_dataset, eval_dataset = get_datasets(config)
    training_args = TrainingArguments(
        output_dir="./results",
        evaluation_strategy="epoch",
        save_strategy="epoch",
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        num_train_epochs=4,
        weight_decay=0.01,
    )

    trainer = Trainer(
        model_init=model_init,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        compute_metrics=compute_metrics,
        tokenizer=tokenizer,
        data_collator=data_collator,
    )

    return trainer.hyperparameter_search(
        direction="maximize",
        backend="wandb", # or "ray", "optuna", "sigopt"
        hp_space=wandb_hp_space, #ray_hp_space, 
        n_trials=5,
    )

In [10]:
%%time

best_trial = do_grid_search("original")
best_trial

  trainer = Trainer(
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Create sweep with ID: dzz7i5uy
Sweep URL: https://wandb.ai/inflaton-ai/uncategorized/sweeps/dzz7i5uy


[34m[1mwandb[0m: Agent Starting Run: f01ucrel with config:
[34m[1mwandb[0m: 	learning_rate: 4.5475191027464986e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 32
[34m[1mwandb[0m: Currently logged in as: [33minflaton-sg[0m ([33minflaton-ai[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Trying to set _wandb in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set assignments in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set metric in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Epoch,Training Loss,Validation Loss,Accuracy
1,0.0485,0.033718,0.990744
2,0.024,0.031024,0.992893
3,0.0081,0.032119,0.993058
4,0.0036,0.039606,0.992893


0,1
eval/accuracy,▁███
eval/loss,▃▁▂█
eval/runtime,▄█▄▁
eval/samples_per_second,▅▁▅█
eval/steps_per_second,▅▁▅█
train/epoch,▁▂▂▂▃▃▄▄▄▅▅▆▆▇▇███
train/global_step,▁▂▂▂▃▃▄▄▄▅▅▆▆▇▇███
train/grad_norm,▁▁▂▁▁▁▁▁▁█▁▁▁
train/learning_rate,█▇▇▆▆▅▄▄▃▃▂▂▁
train/loss,█▄▃▂▂▂▂▁▁▁▁▁▁

0,1
eval/accuracy,0.99289
eval/loss,0.03961
eval/runtime,35.0222
eval/samples_per_second,172.748
eval/steps_per_second,21.615
total_flos,5.729611585941504e+16
train/epoch,4.0
train/global_step,6808.0
train/grad_norm,0.00071
train/learning_rate,0.0


[34m[1mwandb[0m: Agent Starting Run: offgdgwn with config:
[34m[1mwandb[0m: 	learning_rate: 4.6127491547304976e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 32


Trying to set _wandb in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set assignments in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set metric in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Epoch,Training Loss,Validation Loss,Accuracy
1,0.0512,0.033346,0.99124
2,0.0276,0.035557,0.991901
3,0.0102,0.033677,0.993388
4,0.0028,0.0385,0.993223


0,1
eval/accuracy,▁▃█▇
eval/loss,▁▄▁█
eval/runtime,▁▃▃█
eval/samples_per_second,█▆▆▁
eval/steps_per_second,█▆▆▁
train/epoch,▁▂▂▂▃▃▄▄▄▅▅▆▆▇▇███
train/global_step,▁▂▂▂▃▃▄▄▄▅▅▆▆▇▇███
train/grad_norm,▁▁▃█▁▁▅▁▁▁▁▁▁
train/learning_rate,█▇▇▆▆▅▅▄▃▃▂▂▁
train/loss,█▄▃▃▂▂▂▁▁▁▁▁▁

0,1
eval/accuracy,0.99322
eval/loss,0.0385
eval/runtime,35.1124
eval/samples_per_second,172.304
eval/steps_per_second,21.559
total_flos,5.729611585941504e+16
train/epoch,4.0
train/global_step,6808.0
train/grad_norm,0.00219
train/learning_rate,0.0


[34m[1mwandb[0m: Agent Starting Run: jnhzqjh9 with config:
[34m[1mwandb[0m: 	learning_rate: 3.897684881994952e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 32


Trying to set _wandb in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set assignments in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set metric in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Epoch,Training Loss,Validation Loss,Accuracy
1,0.049,0.048821,0.988099
2,0.0256,0.029931,0.993058
3,0.0101,0.029862,0.994711
4,0.0026,0.035875,0.994545


0,1
eval/accuracy,▁▆██
eval/loss,█▁▁▃
eval/runtime,▁▂█▂
eval/samples_per_second,█▇▁▇
eval/steps_per_second,█▇▁▇
train/epoch,▁▂▂▂▃▃▄▄▄▅▅▆▆▇▇███
train/global_step,▁▂▂▂▃▃▄▄▄▅▅▆▆▇▇███
train/grad_norm,▁▁▂▁▁▁█▁▁▁▁▁▁
train/learning_rate,█▇▇▆▆▅▅▄▃▃▂▂▁
train/loss,█▄▃▂▂▂▂▁▁▁▁▁▁

0,1
eval/accuracy,0.99455
eval/loss,0.03588
eval/runtime,35.1237
eval/samples_per_second,172.248
eval/steps_per_second,21.552
total_flos,5.729611585941504e+16
train/epoch,4.0
train/global_step,6808.0
train/grad_norm,0.00064
train/learning_rate,0.0


[34m[1mwandb[0m: Agent Starting Run: udayicr4 with config:
[34m[1mwandb[0m: 	learning_rate: 7.952574230669924e-06
[34m[1mwandb[0m: 	per_device_train_batch_size: 32


Trying to set _wandb in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set assignments in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set metric in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Epoch,Training Loss,Validation Loss,Accuracy
1,0.0561,0.04722,0.98843
2,0.0312,0.03789,0.990248
3,0.0189,0.036003,0.991405
4,0.0096,0.035474,0.992231


0,1
eval/accuracy,▁▄▆█
eval/loss,█▂▁▁
eval/runtime,█▁▇▃
eval/samples_per_second,▁█▂▆
eval/steps_per_second,▁█▂▆
train/epoch,▁▂▂▂▃▃▄▄▄▅▅▆▆▇▇███
train/global_step,▁▂▂▂▃▃▄▄▄▅▅▆▆▇▇███
train/grad_norm,▂▁▄█▁▁▆▁▁▁▁▁▁
train/learning_rate,█▇▇▆▆▅▅▄▃▃▂▂▁
train/loss,█▃▃▂▂▂▂▁▁▁▁▁▁

0,1
eval/accuracy,0.99223
eval/loss,0.03547
eval/runtime,34.9803
eval/samples_per_second,172.954
eval/steps_per_second,21.641
total_flos,5.729611585941504e+16
train/epoch,4.0
train/global_step,6808.0
train/grad_norm,0.00733
train/learning_rate,0.0


[34m[1mwandb[0m: Agent Starting Run: 2q74m15a with config:
[34m[1mwandb[0m: 	learning_rate: 3.900009239754426e-06
[34m[1mwandb[0m: 	per_device_train_batch_size: 16


Trying to set _wandb in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set assignments in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set metric in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Epoch,Training Loss,Validation Loss,Accuracy
1,0.0722,0.056308,0.986942
2,0.0431,0.047212,0.990413
3,0.0253,0.05028,0.990579
4,0.014,0.041323,0.992727


0,1
eval/accuracy,▁▅▅█
eval/loss,█▄▅▁
eval/runtime,█▅▆▁
eval/samples_per_second,▁▄▃█
eval/steps_per_second,▁▄▃█
train/epoch,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇████
train/grad_norm,▂▁▁▁▁▂▁▄▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▁▁▁
train/learning_rate,██▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▃▃▃▃▂▂▂▂▁▁
train/loss,█▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/accuracy,0.99273
eval/loss,0.04132
eval/runtime,35.1185
eval/samples_per_second,172.274
eval/steps_per_second,21.556
total_flos,5.729611585941504e+16
train/epoch,4.0
train/global_step,13612.0
train/grad_norm,0.015
train/learning_rate,0.0


CPU times: total: 4h 18min 53s
Wall time: 4h 50min 4s


BestRun(run_id='jnhzqjh9', objective=0.9945454545454545, hyperparameters={'learning_rate': 3.897684881994952e-05, 'per_device_train_batch_size': 32, 'assignments': {}, 'metric': 'eval/loss'}, run_summary=None)

In [10]:
%%time

best_trial_combined = do_grid_search("combined")
best_trial_combined

  trainer = Trainer(
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Create sweep with ID: su7a4p7b
Sweep URL: https://wandb.ai/inflaton-ai/uncategorized/sweeps/su7a4p7b


[34m[1mwandb[0m: Agent Starting Run: 8fhrlx1u with config:
[34m[1mwandb[0m: 	learning_rate: 9.674440968778798e-06
[34m[1mwandb[0m: 	per_device_train_batch_size: 32
[34m[1mwandb[0m: Currently logged in as: [33minflaton-sg[0m ([33minflaton-ai[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Trying to set _wandb in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set assignments in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set metric in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Epoch,Training Loss,Validation Loss,Accuracy
1,0.1224,0.112944,0.956612
2,0.0872,0.109373,0.961653
3,0.0646,0.13729,0.962727
4,0.0437,0.145524,0.962231


0,1
eval/accuracy,▁▇█▇
eval/loss,▂▁▆█
eval/runtime,▁▄▄█
eval/samples_per_second,█▅▅▁
eval/steps_per_second,█▅▅▁
train/epoch,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇████
train/grad_norm,▂▂▂▁▂▂▂▂▂▂▂▁▃▃▃▃▁▃▂▁█▁▃▃▂▄▃
train/learning_rate,██▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▃▃▃▃▂▂▂▂▁▁
train/loss,█▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▂▂▁▁▁▁▁▁▁

0,1
eval/accuracy,0.96223
eval/loss,0.14552
eval/runtime,167.9102
eval/samples_per_second,72.062
eval/steps_per_second,9.011
total_flos,1.1459223171883008e+17
train/epoch,4.0
train/global_step,13612.0
train/grad_norm,11.81995
train/learning_rate,0.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: u7685nf5 with config:
[34m[1mwandb[0m: 	learning_rate: 7.397204197231888e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 16


Trying to set _wandb in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set assignments in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set metric in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Epoch,Training Loss,Validation Loss,Accuracy
1,0.688,0.685526,0.562479
2,0.6847,0.685383,0.562479
3,0.6878,0.689068,0.562479
4,0.6885,0.685433,0.562479


0,1
eval/accuracy,▁▁▁▁
eval/loss,▁▁█▁
eval/runtime,█▄▁▁
eval/samples_per_second,▁▅██
eval/steps_per_second,▁▅██
train/epoch,▁▁▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
train/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇███
train/grad_norm,▃█▃▄▂▁▂▁▁▁▂▁▄▁▃▃▂▁▂▂▆▁▂▂▁▁▂▂▃▁▂▃▃▂▁▁▂▂▁▂
train/learning_rate,███▇▇▇▇▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁
train/loss,▁███████████████████████████████████████

0,1
eval/accuracy,0.56248
eval/loss,0.68543
eval/runtime,160.7158
eval/samples_per_second,75.288
eval/steps_per_second,9.414
total_flos,1.1459223171883008e+17
train/epoch,4.0
train/global_step,27224.0
train/grad_norm,3.02784
train/learning_rate,0.0


[34m[1mwandb[0m: Agent Starting Run: xo3iqeep with config:
[34m[1mwandb[0m: 	learning_rate: 2.1205357854158003e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 32


Trying to set _wandb in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set assignments in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set metric in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Epoch,Training Loss,Validation Loss,Accuracy
1,0.1142,0.110435,0.958926
2,0.0779,0.110411,0.963802
3,0.047,0.135768,0.965372
4,0.0221,0.15842,0.967521


0,1
eval/accuracy,▁▅▆█
eval/loss,▁▁▅█
eval/runtime,▁▅█▆
eval/samples_per_second,█▄▁▃
eval/steps_per_second,█▄▁▃
train/epoch,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇████
train/grad_norm,▂▃▂▂▂▂▂▂▅▂▃▁▅▅▁▃▁▃█▁▇▁▃▆▃▁▁
train/learning_rate,██▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▃▃▃▃▂▂▂▂▁▁
train/loss,█▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁

0,1
eval/accuracy,0.96752
eval/loss,0.15842
eval/runtime,168.2775
eval/samples_per_second,71.905
eval/steps_per_second,8.991
total_flos,1.1459223171883008e+17
train/epoch,4.0
train/global_step,13612.0
train/grad_norm,0.02897
train/learning_rate,0.0


[34m[1mwandb[0m: Agent Starting Run: lnaql1c7 with config:
[34m[1mwandb[0m: 	learning_rate: 2.304693588855464e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 16


Trying to set _wandb in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set assignments in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set metric in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Epoch,Training Loss,Validation Loss,Accuracy
1,0.1283,0.129828,0.960413
2,0.1017,0.175993,0.959669
3,0.0596,0.164423,0.966364
4,0.0266,0.198734,0.965289


0,1
eval/accuracy,▂▁█▇
eval/loss,▁▆▅█
eval/runtime,█▂▄▁
eval/samples_per_second,▁▇▅█
eval/steps_per_second,▁▇▅█
train/epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇███
train/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇█████
train/grad_norm,▂▃▃▃▂▂▁▅▁▁▅▁▁▁▄█▃▁▁▁▁▄▁▆▁▁▁▁▁▁▁▁▁▁▁▁▃▁▁▁
train/learning_rate,███▇▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁▁
train/loss,██▇▇▆▆▆▆▆▅▅▄▄▄▄▅▄▄▄▄▂▃▃▃▃▃▂▃▃▂▂▁▁▁▁▁▁▁▁▁

0,1
eval/accuracy,0.96529
eval/loss,0.19873
eval/runtime,166.7642
eval/samples_per_second,72.558
eval/steps_per_second,9.073
total_flos,1.1459223171883008e+17
train/epoch,4.0
train/global_step,27224.0
train/grad_norm,0.04917
train/learning_rate,0.0


[34m[1mwandb[0m: Agent Starting Run: yanduh28 with config:
[34m[1mwandb[0m: 	learning_rate: 7.189841685131387e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 16


Trying to set _wandb in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set assignments in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set metric in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Epoch,Training Loss,Validation Loss,Accuracy
1,0.6957,0.68609,0.562479
2,0.6859,0.685375,0.562479
3,0.6847,0.312274,0.901322
4,0.218,0.206514,0.931488


0,1
eval/accuracy,▁▁▇█
eval/loss,██▃▁
eval/runtime,▃▁█▅
eval/samples_per_second,▆█▁▄
eval/steps_per_second,▆█▁▃
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
train/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
train/grad_norm,▁▇▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▂▁▁▁▁▂▁█▂
train/learning_rate,███▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▁▁
train/loss,▃▂▅████████████████████████████▇██████▇▁

0,1
eval/accuracy,0.93149
eval/loss,0.20651
eval/runtime,162.6111
eval/samples_per_second,74.411
eval/steps_per_second,9.304
total_flos,1.1459223171883008e+17
train/epoch,4.0
train/global_step,27224.0
train/grad_norm,9.55706
train/learning_rate,0.0


CPU times: total: 21h 24min 58s
Wall time: 23h 42min 24s


BestRun(run_id='xo3iqeep', objective=0.9675206611570248, hyperparameters={'learning_rate': 2.1205357854158003e-05, 'per_device_train_batch_size': 32, 'assignments': {}, 'metric': 'eval/loss'}, run_summary=None)

In [11]:
%%time

best_trial_rewritten = do_grid_search("rewritten")
best_trial_rewritten

  trainer = Trainer(
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Create sweep with ID: 3ih8nbf0
Sweep URL: https://wandb.ai/inflaton-ai/uncategorized/sweeps/3ih8nbf0


[34m[1mwandb[0m: Agent Starting Run: 809zjbz0 with config:
[34m[1mwandb[0m: 	learning_rate: 7.792192157362349e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 16


Trying to set _wandb in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set assignments in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set metric in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Epoch,Training Loss,Validation Loss,Accuracy
1,0.6936,0.695958,0.437521
2,0.6866,0.68707,0.562479
3,0.6844,0.685715,0.562479
4,0.6843,0.685416,0.562479


0,1
eval/accuracy,▁███
eval/loss,█▂▁▁
eval/runtime,▅▄▁█
eval/samples_per_second,▄▅█▁
eval/steps_per_second,▄▅█▁
train/epoch,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇████
train/grad_norm,▂▃▂█▂▂▂▃▁▂▂▁▄▃▁▁▁▁▂▁▁▂▂▁▁▃▁
train/learning_rate,██▇▇▇▇▆▆▆▆▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▁▁
train/loss,▃▁▃████████████████████████

0,1
eval/accuracy,0.56248
eval/loss,0.68542
eval/runtime,80.3402
eval/samples_per_second,75.305
eval/steps_per_second,9.422
total_flos,5.729611585941504e+16
train/epoch,4.0
train/global_step,13612.0
train/grad_norm,0.79321
train/learning_rate,0.0


[34m[1mwandb[0m: Agent Starting Run: 4sxiorjl with config:
[34m[1mwandb[0m: 	learning_rate: 4.7796518861039814e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 32


Trying to set _wandb in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set assignments in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set metric in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Epoch,Training Loss,Validation Loss,Accuracy
1,0.22,0.209388,0.910579
2,0.1927,0.231465,0.925124
3,0.1337,0.208961,0.933388
4,0.1026,0.20251,0.937025


0,1
eval/accuracy,▁▅▇█
eval/loss,▃█▃▁
eval/runtime,▁▅█▇
eval/samples_per_second,█▄▁▂
eval/steps_per_second,█▄▁▂
train/epoch,▁▂▂▂▃▃▄▄▄▅▅▆▆▇▇███
train/global_step,▁▂▂▂▃▃▄▄▄▅▅▆▆▇▇███
train/grad_norm,▁▄▃▂▅▃▁█▇▂▁█▇
train/learning_rate,█▇▇▆▆▅▄▄▃▃▂▂▁
train/loss,█▅▅▃▃▄▄▃▂▂▁▁▁

0,1
eval/accuracy,0.93702
eval/loss,0.20251
eval/runtime,83.8106
eval/samples_per_second,72.187
eval/steps_per_second,9.032
total_flos,5.729611585941504e+16
train/epoch,4.0
train/global_step,6808.0
train/grad_norm,18.84899
train/learning_rate,0.0


[34m[1mwandb[0m: Agent Starting Run: rh7989hr with config:
[34m[1mwandb[0m: 	learning_rate: 7.346522135775748e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 32


Trying to set _wandb in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set assignments in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set metric in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Epoch,Training Loss,Validation Loss,Accuracy
1,0.3937,0.499309,0.785455
2,0.3531,0.375494,0.865289
3,0.2635,0.255872,0.914215
4,0.1849,0.191771,0.923471


0,1
eval/accuracy,▁▅██
eval/loss,█▅▂▁
eval/runtime,█▅▄▁
eval/samples_per_second,▁▄▅█
eval/steps_per_second,▁▄▅█
train/epoch,▁▂▂▂▃▃▄▄▄▅▅▆▆▇▇███
train/global_step,▁▂▂▂▃▃▄▄▄▅▅▆▆▇▇███
train/grad_norm,▅▂▁▁▂▁▁▃█▂▂▁▂
train/learning_rate,█▇▇▆▆▅▅▄▃▃▂▂▁
train/loss,█▇▆█▆▅▆▄▃▃▂▁▁

0,1
eval/accuracy,0.92347
eval/loss,0.19177
eval/runtime,82.2715
eval/samples_per_second,73.537
eval/steps_per_second,9.201
total_flos,5.729611585941504e+16
train/epoch,4.0
train/global_step,6808.0
train/grad_norm,4.47612
train/learning_rate,0.0


[34m[1mwandb[0m: Agent Starting Run: 5zp8fuhk with config:
[34m[1mwandb[0m: 	learning_rate: 9.139473381529488e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 32


Trying to set _wandb in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set assignments in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set metric in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Epoch,Training Loss,Validation Loss,Accuracy
1,0.6907,0.688417,0.562479
2,0.6873,0.687366,0.562479
3,0.6853,0.686024,0.562479
4,0.6873,0.685433,0.562479


0,1
eval/accuracy,▁▁▁▁
eval/loss,█▆▂▁
eval/runtime,▃▁█▃
eval/samples_per_second,▆█▁▆
eval/steps_per_second,▆█▁▆
train/epoch,▁▂▂▂▃▃▄▄▄▅▅▆▆▇▇███
train/global_step,▁▂▂▂▃▃▄▄▄▅▅▆▆▇▇███
train/grad_norm,█▇▂▃▂▃▂▃▃▂▁▄▂
train/learning_rate,█▇▇▆▆▅▄▄▃▃▂▂▁
train/loss,▁▇███████████

0,1
eval/accuracy,0.56248
eval/loss,0.68543
eval/runtime,82.0778
eval/samples_per_second,73.711
eval/steps_per_second,9.223
total_flos,5.729611585941504e+16
train/epoch,4.0
train/global_step,6808.0
train/grad_norm,1.02793
train/learning_rate,0.0


[34m[1mwandb[0m: Agent Starting Run: dlasjxhr with config:
[34m[1mwandb[0m: 	learning_rate: 4.914667895827951e-05
[34m[1mwandb[0m: 	per_device_train_batch_size: 16


Trying to set _wandb in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set assignments in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set metric in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




Epoch,Training Loss,Validation Loss,Accuracy
1,0.3739,0.71754,0.437521
2,0.3156,0.304085,0.900992
3,0.2434,0.265917,0.919669
4,0.2139,0.255113,0.922645


0,1
eval/accuracy,▁███
eval/loss,█▂▁▁
eval/runtime,▄▁█▇
eval/samples_per_second,▅█▁▂
eval/steps_per_second,▅█▁▂
train/epoch,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇████
train/grad_norm,▁▁▂▁▁▁▁█▁▁▁▁▁▁▁▄▁▁▁▁▁▁▁▁▁▁▁
train/learning_rate,██▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▃▃▃▃▂▂▂▂▁▁
train/loss,▃▂▂▃▃▃█▇▃▄▄▅▂▂▂▂▂▂▂▁▁▁▂▁▁▁▁

0,1
eval/accuracy,0.92264
eval/loss,0.25511
eval/runtime,82.4405
eval/samples_per_second,73.386
eval/steps_per_second,9.182
total_flos,5.729611585941504e+16
train/epoch,4.0
train/global_step,13612.0
train/grad_norm,5.62292
train/learning_rate,0.0


CPU times: total: 10h 46min 12s
Wall time: 11h 50min 41s


BestRun(run_id='4sxiorjl', objective=0.9370247933884297, hyperparameters={'learning_rate': 4.7796518861039814e-05, 'per_device_train_batch_size': 32, 'assignments': {}, 'metric': 'eval/loss'}, run_summary=None)