In [1]:
import sys
sys.path.insert(0, '/workspace/brainbias/src')


In [2]:
from os import environ

from torch.utils.data import DataLoader
from transformers import AutoTokenizer, AutoModel 
import lightning.pytorch as pl

from model import BERT 
from pl_model import LitBert

from utils.EthicsDataset import EthicsDataset
from utils.loading_data import multiple_dataset_loading, DEFAULT_DATASETS
from utils.constants import Sampling

import wandb
from lightning.pytorch.loggers import WandbLogger

In [3]:
from transformers import AutoModelForMaskedLM, RobertaTokenizer, RobertaModel, BertForSequenceClassification 

# DeBERTa
# tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-v3-base")
# model = AutoModel.from_pretrained("microsoft/deberta-v3-base")

# BERT - uncased
# tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
# model = AutoModelForMaskedLM.from_pretrained("bert-base-cased")

# BERT - cased
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
model = AutoModel.from_pretrained("bert-base-cased")

# RoBERTa
# tokenizer = RobertaTokenizer.from_pretrained('roberta-large')
# model = RobertaModel.from_pretrained('roberta-large')


In [4]:
# !pip install gputil
import torch
import GPUtil

torch.cuda.empty_cache()

GPUtil.showUtilization()



| ID | GPU | MEM |
------------------
|  0 |  0% | 37% |


In [5]:
    
logger = WandbLogger()

context = {'artifactspath': '/workspace/brainbias/artifacts',
            'batch_size': 10,
            'batches_per_epoch': 20,
            'checkpoint': 'bert-base-cased',
            'datapath': '/workspace/brainbias/data',
            'loss_names': ['cross-entropy', 'mse'],
            'loss_weights': [1.0, 1.0],
            'lr': 0.0006538379548447884,
            'num_epochs': 4,
            'num_samples_test':  1500,
            'num_samples_train': 1000,
            'only_train_head': False,
            'regularization_coef': 0.1,
            'regularize_from_init': False,
            'sampling_method': Sampling.LAST, # <Sampling.LAST: 1>,
            'shuffle_test': False,
            'shuffle_train': True,
            'train_datasets': ['EthicsDataset', 'DS000212_LFB_Dataset']}

dataloaders, train_head_dims = multiple_dataset_loading(tokenizer, context)

lit_model = LitBert(BERT(model, head_dims=train_head_dims), context)

# logger = WandbLogger(save_dir=context['artifactspath'], project="AISC_BB")
logger.log_hyperparams(context)

# train the model
trainer = pl.Trainer(
    limit_train_batches=context['batches_per_epoch'],
    max_epochs=context['num_epochs'],
    accelerator='auto',
    devices='auto',
    strategy='auto',
    logger=logger,
    log_every_n_steps=1,
    default_root_dir=context['artifactspath'],
    enable_checkpointing=False  # Avoid saving full model into a disk (GBs)
)

print('Fine tuning BERT...')
trainer.fit(lit_model, dataloaders)

data = EthicsDataset(context, tokenizer, is_train=False)
test_loader = DataLoader( data, batch_size=context['batch_size'], shuffle=context['shuffle_test'])

print('Testing on ETHICS...')
trainer.test(lit_model, dataloaders=test_loader)
logger.save()
wandb.finish()

AssertionError: 

In [5]:
def wandb_sweep():

    wandb.init()
    logger = WandbLogger()

    context = {'artifactspath': '/workspace/brainbias/artifacts',
               'batch_size': wandb.config.batch_size, # 15
               # 'batches_per_epoch': 15
               'checkpoint': 'bert-base-cased',
               'datapath': '/workspace/brainbias/data',
               'loss_names': ['cross-entropy', 'mse'],
               'loss_weights': [1.0, 1.0],
               'lr': wandb.config.lr, # 0.0006538379548447884,
               # 'num_epochs': 4,
               'num_samples_test':  1500, # 3885,
               'num_samples_train': 1000, # 13911,
               'only_train_head': False,
               'regularization_coef': 0.1,
               'regularize_from_init': False,
               'sampling_method': Sampling.LAST, # <Sampling.LAST: 1>,
               'shuffle_test': False,
               'shuffle_train': True,
               'train_datasets': ['EthicsDataset', 'DS000212_LFB_Dataset']}

    dataloaders, train_head_dims = multiple_dataset_loading(tokenizer, context)

    model = AutoModel.from_pretrained("bert-base-cased")
    lit_model = LitBert(BERT(model, head_dims = train_head_dims), context)

    # logger = WandbLogger(save_dir=context['artifactspath'], project="AISC_BB")
    logger.log_hyperparams(context)

    # train the model
    trainer = pl.Trainer(
        limit_train_batches=wandb.config.batches_per_epoch,
        max_epochs=wandb.config.num_epochs,
        accelerator='auto',
        devices='auto',
        strategy='auto',
        logger=logger,
        log_every_n_steps=1,
        default_root_dir=context['artifactspath'],
        enable_checkpointing=True  # Avoid saving full model into a disk (GBs)
    )
    
    print('Fine tuning BERT...')
    trainer.fit(lit_model, dataloaders)

    data = EthicsDataset(context, tokenizer, is_train=False)
    test_loader = DataLoader( data, batch_size=context['batch_size'], shuffle=context['shuffle_test'])
    
    print('Testing on ETHICS...')
    trainer.test(lit_model, dataloaders=test_loader)
    logger.save()
    wandb.finish()

In [6]:
sweep_config = {'method': 'random'}
parameters_dict = {# 'model': {'value': model},
                   'batch_size': {'values': [12]},
                   'batches_per_epoch': {'values': [19]},
                   'lr': {'distribution': 'uniform',
                          'min': 1e-5,
                          'max': 5e-5},
                   'num_epochs': {'values': [250]},
                    }

sweep_config['parameters'] = parameters_dict

sweep_id = wandb.sweep(sweep_config, project="Seong-AISC_BB")

wandb.agent(sweep_id, function = wandb_sweep, count = 100)


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Create sweep with ID: 9jxs9dnv
Sweep URL: https://wandb.ai/asdfasdfasdfdsafsd/Seong-AISC_BB/sweeps/9jxs9dnv


[34m[1mwandb[0m: Agent Starting Run: ikzewm7q with config:
[34m[1mwandb[0m: 	batch_size: 12
[34m[1mwandb[0m: 	batches_per_epoch: 19
[34m[1mwandb[0m: 	lr: 3.984818777527335e-05
[34m[1mwandb[0m: 	num_epochs: 250
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mseonghahcho[0m ([33masdfasdfasdfdsafsd[0m). Use [1m`wandb login --relogin`[0m to force relogin


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Fine tuning BERT...


  rank_zero_warn(
You are using a CUDA device ('NVIDIA RTX A4000') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type | Params
-------------------------------
0 | model | BERT | 109 M 
-------------------------------
109 M     Trainable params
0         Non-trainable params
109 M     Total params
436.397   Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=250` reached.


Testing on ETHICS...


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]



0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_acc,▁
train_loss,█▅▄▄▂▂▂▂▃▂▂▂▂▂▂▂▁▁▂▂▂▁▂▂▁▂▁▁▂▁▁▁▁▂▁▂▁▁▂▁
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
epoch,250.0
test_acc,0.66867
train_loss,0.81836
trainer/global_step,4750.0


[34m[1mwandb[0m: Agent Starting Run: r2mnns9n with config:
[34m[1mwandb[0m: 	batch_size: 12
[34m[1mwandb[0m: 	batches_per_epoch: 19
[34m[1mwandb[0m: 	lr: 3.5456057808640376e-05
[34m[1mwandb[0m: 	num_epochs: 250
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type | Params
-------------------------------
0 | model | BERT | 109 M 
-------------------------------
109 M     Trainable params
0         Non-trainable params
109 M     Total params
436.397   Total estimated model params size (MB)


Fine tuning BERT...


  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=250` reached.


Testing on ETHICS...


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]



0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_acc,▁
train_loss,█▅▄▅▃▃▂▂▃▂▂▂▂▂▂▂▁▁▂▂▂▁▂▂▁▂▁▁▂▂▁▁▁▁▁▂▁▁▂▁
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
epoch,250.0
test_acc,0.68267
train_loss,0.82284
trainer/global_step,4750.0


[34m[1mwandb[0m: Agent Starting Run: gbu2gnpp with config:
[34m[1mwandb[0m: 	batch_size: 12
[34m[1mwandb[0m: 	batches_per_epoch: 19
[34m[1mwandb[0m: 	lr: 2.5033379281498443e-05
[34m[1mwandb[0m: 	num_epochs: 250
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type | Params
-------------------------------
0 | model | BERT | 109 M 
-------------------------------
109 M     Trainable params
0         Non-trainable params
109 M     Total params
436.397   Total estimated model params size (MB)


Fine tuning BERT...


  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=250` reached.


Testing on ETHICS...


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]



0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_acc,▁
train_loss,█▅▅▄▃▃▂▂▄▂▂▂▃▂▂▂▁▂▃▂▂▂▂▂▁▂▁▁▂▃▂▁▁▂▁▂▁▁▂▂
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
epoch,250.0
test_acc,0.67067
train_loss,0.82296
trainer/global_step,4750.0


[34m[1mwandb[0m: Agent Starting Run: 99n9lazo with config:
[34m[1mwandb[0m: 	batch_size: 12
[34m[1mwandb[0m: 	batches_per_epoch: 19
[34m[1mwandb[0m: 	lr: 4.6060154632384326e-05
[34m[1mwandb[0m: 	num_epochs: 250
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type | Params
-------------------------------
0 | model | BERT | 109 M 
-------------------------------
109 M     Trainable params
0         Non-trainable params
109 M     Total params
436.397   Total estimated model params size (MB)


Fine tuning BERT...


  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=250` reached.


Testing on ETHICS...


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]



0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_acc,▁
train_loss,▆█▅▄▃▃▂▂▃▂▂▂▂▂▂▂▁▁▃▂▂▁▂▂▁▂▁▁▂▂▂▁▁▂▁▂▁▁▂▂
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
epoch,250.0
test_acc,0.67267
train_loss,0.82141
trainer/global_step,4750.0


[34m[1mwandb[0m: Agent Starting Run: 0aqlc2au with config:
[34m[1mwandb[0m: 	batch_size: 12
[34m[1mwandb[0m: 	batches_per_epoch: 19
[34m[1mwandb[0m: 	lr: 2.806187630782672e-05
[34m[1mwandb[0m: 	num_epochs: 250
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type | Params
-------------------------------
0 | model | BERT | 109 M 
-------------------------------
109 M     Trainable params
0         Non-trainable params
109 M     Total params
436.397   Total estimated model params size (MB)


Fine tuning BERT...


  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=250` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing on ETHICS...


  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

wandb: Network error (TransientError), entering retry loop.


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_acc,▁
train_loss,█▆▆▄▃▃▂▂▄▂▂▂▃▂▂▂▁▂▃▂▂▁▂▂▁▂▁▁▂▂▂▁▁▂▁▂▁▁▂▂
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
epoch,250.0
test_acc,0.66067
train_loss,0.82555
trainer/global_step,4750.0


[34m[1mwandb[0m: Agent Starting Run: q9te5cbc with config:
[34m[1mwandb[0m: 	batch_size: 12
[34m[1mwandb[0m: 	batches_per_epoch: 19
[34m[1mwandb[0m: 	lr: 4.233571951500354e-05
[34m[1mwandb[0m: 	num_epochs: 250
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01666870273281044, max=1.0)…

[34m[1mwandb[0m: Network error (ConnectionError), entering retry loop.
wandb: Network error (ConnectionError), entering retry loop.
[34m[1mwandb[0m: [32m[41mERROR[0m Run q9te5cbc errored: CommError('Run initialization has timed out after 60.0 sec. \nPlease refer to the documentation for additional information: https://docs.wandb.ai/guides/track/tracking-faq#initstarterror-error-communicating-with-wandb-process-')


Problem at: /tmp/ipykernel_128754/1656914270.py 3 wandb_sweep


[34m[1mwandb[0m: Agent Starting Run: wib573lz with config:
[34m[1mwandb[0m: 	batch_size: 12
[34m[1mwandb[0m: 	batches_per_epoch: 19
[34m[1mwandb[0m: 	lr: 1.6091807738077005e-05
[34m[1mwandb[0m: 	num_epochs: 250


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016669353983403804, max=1.0…

  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type | Params
-------------------------------
0 | model | BERT | 109 M 
-------------------------------
109 M     Trainable params
0         Non-trainable params
109 M     Total params
436.397   Total estimated model params size (MB)


Fine tuning BERT...


  rank_zero_warn(


Training: 0it [00:00, ?it/s]

wandb: Network error (ConnectionError), entering retry loop.
wandb: Network error (TransientError), entering retry loop.
`Trainer.fit` stopped: `max_epochs=250` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing on ETHICS...


  rank_zero_warn(


Testing: 0it [00:00, ?it/s]



0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_acc,▁
train_loss,██▅▅▃▄▃▃▅▃▃▃▄▃▂▃▂▂▃▂▂▂▅▂▂▂▁▂▃▂▂▂▁▂▂▂▁▂▃▂
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
epoch,250.0
test_acc,0.69067
train_loss,0.83644
trainer/global_step,4750.0


[34m[1mwandb[0m: Agent Starting Run: 1qmonj2o with config:
[34m[1mwandb[0m: 	batch_size: 12
[34m[1mwandb[0m: 	batches_per_epoch: 19
[34m[1mwandb[0m: 	lr: 1.2123088487137248e-05
[34m[1mwandb[0m: 	num_epochs: 250


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type | Params
-------------------------------
0 | model | BERT | 109 M 
-------------------------------
109 M     Trainable params
0         Non-trainable params
109 M     Total params
436.397   Total estimated model params size (MB)


Fine tuning BERT...


  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=250` reached.


Testing on ETHICS...


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]



0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_acc,▁
train_loss,█▇▅▄▃▃▃▃▅▃▃▃▄▃▃▃▂▂▃▂▂▂▃▂▂▂▁▂▃▂▂▂▁▂▂▂▁▂▃▂
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
epoch,250.0
test_acc,0.69333
train_loss,0.85355
trainer/global_step,4750.0


[34m[1mwandb[0m: Agent Starting Run: q9te5cbc with config:
[34m[1mwandb[0m: 	batch_size: 12
[34m[1mwandb[0m: 	batches_per_epoch: 19
[34m[1mwandb[0m: 	lr: 4.233571951500354e-05
[34m[1mwandb[0m: 	num_epochs: 250


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type | Params
-------------------------------
0 | model | BERT | 109 M 
-------------------------------
109 M     Trainable params
0         Non-trainable params
109 M     Total params
436.397   Total estimated model params size (MB)
  rank_zero_warn(


Fine tuning BERT...


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=250` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing on ETHICS...


  rank_zero_warn(


Testing: 0it [00:00, ?it/s]



0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_acc,▁
train_loss,█▆▅▄▃▆▂▂▃▂▂▂▃▂▂▂▁▂▃▂▂▂▃▂▂▂▁▂▃▂▂▂▁▂▁▂▁▂▃▂
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
epoch,250.0
test_acc,0.65667
train_loss,0.82108
trainer/global_step,4750.0


[34m[1mwandb[0m: Agent Starting Run: himzbiai with config:
[34m[1mwandb[0m: 	batch_size: 12
[34m[1mwandb[0m: 	batches_per_epoch: 19
[34m[1mwandb[0m: 	lr: 2.6989323767267457e-05
[34m[1mwandb[0m: 	num_epochs: 250


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type | Params
-------------------------------
0 | model | BERT | 109 M 
-------------------------------
109 M     Trainable params
0         Non-trainable params
109 M     Total params
436.397   Total estimated model params size (MB)


Fine tuning BERT...


  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=250` reached.


Testing on ETHICS...


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]



VBox(children=(Label(value='0.005 MB of 0.005 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_acc,▁
train_loss,█▇▆▅▃▄▂▂▄▃▃▂▃▃▂▂▁▂▃▂▂▂▃▂▁▂▁▂▃▂▂▂▁▂▁▂▁▁▃▂
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
epoch,250.0
test_acc,0.68333
train_loss,0.82586
trainer/global_step,4750.0


[34m[1mwandb[0m: Agent Starting Run: 6b61nk67 with config:
[34m[1mwandb[0m: 	batch_size: 12
[34m[1mwandb[0m: 	batches_per_epoch: 19
[34m[1mwandb[0m: 	lr: 1.5916577041984455e-05
[34m[1mwandb[0m: 	num_epochs: 250


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type | Params
-------------------------------
0 | model | BERT | 109 M 
-------------------------------
109 M     Trainable params
0         Non-trainable params
109 M     Total params
436.397   Total estimated model params size (MB)
  rank_zero_warn(


Fine tuning BERT...


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=250` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing on ETHICS...


  rank_zero_warn(


Testing: 0it [00:00, ?it/s]



0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_acc,▁
train_loss,▇█▇▅▃▄▄▃▅▃▃▃▄▃▂▃▂▂▃▂▂▃▃▂▂▂▁▂▃▂▂▂▁▂▂▂▁▂▃▂
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
epoch,250.0
test_acc,0.682
train_loss,0.84027
trainer/global_step,4750.0


[34m[1mwandb[0m: Agent Starting Run: 5yvzor81 with config:
[34m[1mwandb[0m: 	batch_size: 12
[34m[1mwandb[0m: 	batches_per_epoch: 19
[34m[1mwandb[0m: 	lr: 4.39394196454544e-05
[34m[1mwandb[0m: 	num_epochs: 250


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type | Params
-------------------------------
0 | model | BERT | 109 M 
-------------------------------
109 M     Trainable params
0         Non-trainable params
109 M     Total params
436.397   Total estimated model params size (MB)


Fine tuning BERT...


  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=250` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing on ETHICS...


  rank_zero_warn(


Testing: 0it [00:00, ?it/s]



0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_acc,▁
train_loss,▇▇▅█▃▃▂▂▃▂▂▂▃▂▂▂▁▂▄▂▂▂▂▂▁▂▁▁▃▂▂▁▁▂▁▂▁▁▃▂
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
epoch,250.0
test_acc,0.67467
train_loss,0.81828
trainer/global_step,4750.0


[34m[1mwandb[0m: Agent Starting Run: ftkr6sz0 with config:
[34m[1mwandb[0m: 	batch_size: 12
[34m[1mwandb[0m: 	batches_per_epoch: 19
[34m[1mwandb[0m: 	lr: 3.1073457827795395e-05
[34m[1mwandb[0m: 	num_epochs: 250


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type | Params
-------------------------------
0 | model | BERT | 109 M 
-------------------------------
109 M     Trainable params
0         Non-trainable params
109 M     Total params
436.397   Total estimated model params size (MB)


Fine tuning BERT...


  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=250` reached.


Testing on ETHICS...


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]



0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_acc,▁
train_loss,█▅▆▄▃▃▂▂▄▂▂▂▃▂▂▂▁▂▃▂▂▂▂▂▁▂▁▁▂▂▂▁▁▂▁▂▁▁▂▂
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
epoch,250.0
test_acc,0.68067
train_loss,0.82345
trainer/global_step,4750.0


[34m[1mwandb[0m: Agent Starting Run: ereiujmv with config:
[34m[1mwandb[0m: 	batch_size: 12
[34m[1mwandb[0m: 	batches_per_epoch: 19
[34m[1mwandb[0m: 	lr: 3.5428581517834246e-05
[34m[1mwandb[0m: 	num_epochs: 250


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type | Params
-------------------------------
0 | model | BERT | 109 M 
-------------------------------
109 M     Trainable params
0         Non-trainable params
109 M     Total params
436.397   Total estimated model params size (MB)


Fine tuning BERT...


  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=250` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing on ETHICS...


  rank_zero_warn(


Testing: 0it [00:00, ?it/s]



0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_acc,▁
train_loss,▅█▄▃▃▂▂▂▃▂▂▂▂▂▁▂▁▁▂▁▂▁▂▂▁▂▁▁▂▁▁▁▁▁▁▂▁▁▂▁
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
epoch,250.0
test_acc,0.67067
train_loss,0.82042
trainer/global_step,4750.0


[34m[1mwandb[0m: Agent Starting Run: pdu6x24k with config:
[34m[1mwandb[0m: 	batch_size: 12
[34m[1mwandb[0m: 	batches_per_epoch: 19
[34m[1mwandb[0m: 	lr: 1.855180136844347e-05
[34m[1mwandb[0m: 	num_epochs: 250


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type | Params
-------------------------------
0 | model | BERT | 109 M 
-------------------------------
109 M     Trainable params
0         Non-trainable params
109 M     Total params
436.397   Total estimated model params size (MB)


Fine tuning BERT...


  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=250` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing on ETHICS...


  rank_zero_warn(


Testing: 0it [00:00, ?it/s]



0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_acc,▁
train_loss,▇█▅▅▄▄▃▃▅▃▃▄▄▃▂▂▁▂▃▂▂▂▃▂▂▂▁▂▃▂▂▂▁▂▃▂▁▂▃▂
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
epoch,250.0
test_acc,0.68667
train_loss,0.83054
trainer/global_step,4750.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 67u69l68 with config:
[34m[1mwandb[0m: 	batch_size: 12
[34m[1mwandb[0m: 	batches_per_epoch: 19
[34m[1mwandb[0m: 	lr: 1.3789441869133273e-05
[34m[1mwandb[0m: 	num_epochs: 250


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type | Params
-------------------------------
0 | model | BERT | 109 M 
-------------------------------
109 M     Trainable params
0         Non-trainable params
109 M     Total params
436.397   Total estimated model params size (MB)


Fine tuning BERT...


  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=250` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing on ETHICS...


  rank_zero_warn(


Testing: 0it [00:00, ?it/s]



0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_acc,▁
train_loss,█▅▄▄▃▃▂▃▄▃▃▃▄▃▂▂▂▂▃▂▂▂▃▂▂▂▁▂▃▂▂▂▁▂▁▂▁▁▂▂
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
epoch,250.0
test_acc,0.68467
train_loss,0.84266
trainer/global_step,4750.0


[34m[1mwandb[0m: Agent Starting Run: qdvxec0q with config:
[34m[1mwandb[0m: 	batch_size: 12
[34m[1mwandb[0m: 	batches_per_epoch: 19
[34m[1mwandb[0m: 	lr: 4.027096007386502e-05
[34m[1mwandb[0m: 	num_epochs: 250


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type | Params
-------------------------------
0 | model | BERT | 109 M 
-------------------------------
109 M     Trainable params
0         Non-trainable params
109 M     Total params
436.397   Total estimated model params size (MB)


Fine tuning BERT...


  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=250` reached.


Testing on ETHICS...


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]



0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_acc,▁
train_loss,▆▇█▅▃▄▂▂▄▂▂▂▃▂▂▂▁▂▃▂▂▂▃▂▁▂▁▂▃▂▂▂▁▂▁▂▁▂▃▂
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
epoch,250.0
test_acc,0.678
train_loss,0.8177
trainer/global_step,4750.0


[34m[1mwandb[0m: Agent Starting Run: zkjcycas with config:
[34m[1mwandb[0m: 	batch_size: 12
[34m[1mwandb[0m: 	batches_per_epoch: 19
[34m[1mwandb[0m: 	lr: 2.5918297949103125e-05
[34m[1mwandb[0m: 	num_epochs: 250


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type | Params
-------------------------------
0 | model | BERT | 109 M 
-------------------------------
109 M     Trainable params
0         Non-trainable params
109 M     Total params
436.397   Total estimated model params size (MB)


Fine tuning BERT...


  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=250` reached.


Testing on ETHICS...


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]



0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_acc,▁
train_loss,▆█▅▅▃▃▂▂▄▃▃▂▃▃▂▂▁▂▃▂▂▂▃▂▂▂▁▁▃▂▂▁▁▂▁▂▁▂▃▂
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
epoch,250.0
test_acc,0.68067
train_loss,0.82285
trainer/global_step,4750.0


[34m[1mwandb[0m: Agent Starting Run: svh0fk6t with config:
[34m[1mwandb[0m: 	batch_size: 12
[34m[1mwandb[0m: 	batches_per_epoch: 19
[34m[1mwandb[0m: 	lr: 4.686856365426256e-05
[34m[1mwandb[0m: 	num_epochs: 250


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type | Params
-------------------------------
0 | model | BERT | 109 M 
-------------------------------
109 M     Trainable params
0         Non-trainable params
109 M     Total params
436.397   Total estimated model params size (MB)


Fine tuning BERT...


  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=250` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing on ETHICS...


  rank_zero_warn(


Testing: 0it [00:00, ?it/s]



0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_acc,▁
train_loss,█▄▄▄▂▂▁▂▃▂▂▂▂▂▂▂▁▁▂▂▂▁▂▂▁▂▁▁▂▂▂▁▁▁▁▂▁▁▂▂
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
epoch,250.0
test_acc,0.67333
train_loss,0.81639
trainer/global_step,4750.0


[34m[1mwandb[0m: Agent Starting Run: 706ebmhs with config:
[34m[1mwandb[0m: 	batch_size: 12
[34m[1mwandb[0m: 	batches_per_epoch: 19
[34m[1mwandb[0m: 	lr: 4.083296934713376e-05
[34m[1mwandb[0m: 	num_epochs: 250


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type | Params
-------------------------------
0 | model | BERT | 109 M 
-------------------------------
109 M     Trainable params
0         Non-trainable params
109 M     Total params
436.397   Total estimated model params size (MB)


Fine tuning BERT...


  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=250` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing on ETHICS...


  rank_zero_warn(


Testing: 0it [00:00, ?it/s]



0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_acc,▁
train_loss,▇▅█▅▃▃▂▂▄▂▂▂▃▂▂▂▁▂▃▂▂▁▂▂▁▂▁▁▃▂▂▁▁▂▁▂▁▁▃▂
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
epoch,250.0
test_acc,0.68267
train_loss,0.82053
trainer/global_step,4750.0


[34m[1mwandb[0m: Agent Starting Run: 9m08917p with config:
[34m[1mwandb[0m: 	batch_size: 12
[34m[1mwandb[0m: 	batches_per_epoch: 19
[34m[1mwandb[0m: 	lr: 4.6064245663666206e-05
[34m[1mwandb[0m: 	num_epochs: 250


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type | Params
-------------------------------
0 | model | BERT | 109 M 
-------------------------------
109 M     Trainable params
0         Non-trainable params
109 M     Total params
436.397   Total estimated model params size (MB)


Fine tuning BERT...


  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=250` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing on ETHICS...


  rank_zero_warn(


Testing: 0it [00:00, ?it/s]



VBox(children=(Label(value='0.005 MB of 0.005 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_acc,▁
train_loss,█▅█▅▃▃▂▂▄▂▂▂▃▂▂▂▁▂▃▂▂▂▃▂▁▂▁▁▃▂▂▁▁▂▁▂▁▂▃▂
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
epoch,250.0
test_acc,0.67133
train_loss,0.82095
trainer/global_step,4750.0


[34m[1mwandb[0m: Agent Starting Run: 2v011z95 with config:
[34m[1mwandb[0m: 	batch_size: 12
[34m[1mwandb[0m: 	batches_per_epoch: 19
[34m[1mwandb[0m: 	lr: 1.3636356852561404e-05
[34m[1mwandb[0m: 	num_epochs: 250


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type | Params
-------------------------------
0 | model | BERT | 109 M 
-------------------------------
109 M     Trainable params
0         Non-trainable params
109 M     Total params
436.397   Total estimated model params size (MB)


Fine tuning BERT...


  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=250` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing on ETHICS...


  rank_zero_warn(


Testing: 0it [00:00, ?it/s]



0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_acc,▁
train_loss,█▅▅▄▃▃▂▃▅▃▃▃▄▃▂▂▂▂▃▂▂▂▃▂▂▂▁▂▃▂▂▂▁▂▂▂▁▂▃▂
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
epoch,250.0
test_acc,0.688
train_loss,0.84506
trainer/global_step,4750.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ws9pngmg with config:
[34m[1mwandb[0m: 	batch_size: 12
[34m[1mwandb[0m: 	batches_per_epoch: 19
[34m[1mwandb[0m: 	lr: 2.8276535075222247e-05
[34m[1mwandb[0m: 	num_epochs: 250


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type | Params
-------------------------------
0 | model | BERT | 109 M 
-------------------------------
109 M     Trainable params
0         Non-trainable params
109 M     Total params
436.397   Total estimated model params size (MB)


Fine tuning BERT...


  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=250` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing on ETHICS...


  rank_zero_warn(


Testing: 0it [00:00, ?it/s]



VBox(children=(Label(value='0.005 MB of 0.005 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_acc,▁
train_loss,█▅▅▄▃▃▂▂▄▂▂▂▃▂▂▂▁▂▃▂▂▂▂▂▁▂▁▁▂▂▂▁▁▂▁▂▁▁▂▂
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
epoch,250.0
test_acc,0.688
train_loss,0.82248
trainer/global_step,4750.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: qp8dlvc9 with config:
[34m[1mwandb[0m: 	batch_size: 12
[34m[1mwandb[0m: 	batches_per_epoch: 19
[34m[1mwandb[0m: 	lr: 4.9089517179372296e-05
[34m[1mwandb[0m: 	num_epochs: 250


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type | Params
-------------------------------
0 | model | BERT | 109 M 
-------------------------------
109 M     Trainable params
0         Non-trainable params
109 M     Total params
436.397   Total estimated model params size (MB)


Fine tuning BERT...


  rank_zero_warn(


Training: 0it [00:00, ?it/s]