<a href="https://colab.research.google.com/github/danielsaggau/IR_LDC/blob/main/hyperparameter_tuning_classification_hf.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This notebooks provides some hyperparameter tuning

# Set up Dataset

In [None]:
#load packages
!pip install transformers


In [20]:

from transformers import TrainerCallback 
from datasets import load_metric
import numpy as np
import torch as nn
     

In [2]:
#load data 
!pip install datasets
from datasets import load_dataset
dataset=load_dataset("lex_glue","scotus")
train_dataset=dataset['train']
train_dataset = train_dataset.shard(index=1, num_shards=10)

Generating train split:   0%|          | 0/5000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1400 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1400 [00:00<?, ? examples/s]

Dataset lex_glue downloaded and prepared to /root/.cache/huggingface/datasets/lex_glue/scotus/1.0.0/8a66420941bf6e77a7ddd4da4d3bfb7ba88ef48c1d55302a568ac650a095ca3a. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

In [13]:
test_dataset=dataset['test']
test_dataset = test_dataset.shard(index=1, num_shards=10)

In [15]:
eval_dataset = test_dataset.map(
      preprocess_function,
      batched=True,
      desc="tokenizing the entire dataset")

tokenizing the entire dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

In [16]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
padding="max_length"

tokenizer = AutoTokenizer.from_pretrained('danielsaggau/bregman_1.5', use_fast=True)

def preprocess_function(examples):
      return tokenizer(examples["text"], truncation=True, padding=padding)

tokenized_data = train_dataset.map(
      preprocess_function,
      batched=True,
      desc="tokenizing the entire dataset")

loading file vocab.txt from cache at /root/.cache/huggingface/hub/models--danielsaggau--bregman_1.5/snapshots/363ae19253237bb845fc9861c93ac6033414e92d/vocab.txt
loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--danielsaggau--bregman_1.5/snapshots/363ae19253237bb845fc9861c93ac6033414e92d/tokenizer.json
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--danielsaggau--bregman_1.5/snapshots/363ae19253237bb845fc9861c93ac6033414e92d/special_tokens_map.json
loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--danielsaggau--bregman_1.5/snapshots/363ae19253237bb845fc9861c93ac6033414e92d/tokenizer_config.json


# Set up Trainer 

Compute Metric Function


In [4]:
def compute_metrics(eval_pred):
  metric1 = load_metric("f1")
  logits, labels = eval_pred
  predictions = np.argmax(logits, axis=-1)
  micro1 = metric1.compute(predictions=predictions, references=labels, average="micro")["f1"]
  macro1 = metric1.compute(predictions=predictions, references=labels, average="macro")["f1"]
  return { "f1-micro": micro1, "f1-macro": macro1} 

Training Arguments

In [6]:
from transformers import TrainingArguments
training_args = TrainingArguments(
    output_dir='scotus_max_linear',
    learning_rate=3e-5,
    per_device_train_batch_size=6,
    per_device_eval_batch_size=6,
    num_train_epochs=10,
    weight_decay=0.01,
    save_strategy="epoch",
    evaluation_strategy="epoch",
 #   push_to_hub=True,
    metric_for_best_model="f1-micro",
    fp16=True,
#    report_to="wandb",
    greater_is_better=True,
    lr_scheduler_type='linear',
 #   run_name="max",
    load_best_model_at_end = True
)

In [7]:
from transformers import DataCollatorWithPadding
data_collator = DataCollatorWithPadding(tokenizer, pad_to_multiple_of=8) # fp16

In [8]:
def model_init():
    return AutoModelForSequenceClassification.from_pretrained('danielsaggau/bregman_1.5', num_labels=14)


In [17]:
from transformers import Trainer, EarlyStoppingCallback
trainer = Trainer(
    compute_metrics=compute_metrics,
    args=training_args,
    train_dataset=tokenized_data,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,    
    model_init=model_init,
    callbacks = [EarlyStoppingCallback(early_stopping_patience=5)]
      )

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--danielsaggau--bregman_1.5/snapshots/363ae19253237bb845fc9861c93ac6033414e92d/config.json
Model config LongformerConfig {
  "_name_or_path": "danielsaggau/bregman_1.5",
  "architectures": [
    "LongformerModel"
  ],
  "attention_mode": "longformer",
  "attention_probs_dropout_prob": 0.1,
  "attention_window": [
    128,
    128,
    128,
    128,
    128,
    128
  ],
  "bos_token_id": 0,
  "classifier_dropout": null,
  "cls_token_id": 101,
  "eos_token_id": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 512,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6",
    "7": "LABEL_7",
    "8": "LABEL_8",
    "9": "LABEL_9",
    "10": "LABEL_10",
    "11": "LABEL_11",
    "12": "LABEL_12",
    "13": "LABEL_13"
  },
  "ignore_attention_ma

# Define hyperparameter space 

In [26]:
def hp_space(trial):
    return {
        "learning_rate": trial.suggest_float("learning_rate", 1e-5, 1e-3, log=True),
        "weight_decay": trial.suggest_float("weight_decay", 0.01,0.05),
        "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [2,3,4,6,8]),
        "num_train_epochs": trial.suggest_categorical("num_train_epochs", [4,5,6,7, 10,15]),
        "lr_scheduler_type": trial.suggest_categorical("lr_scheduler_type", ['linear', 'cosine', 'polynomial','constant', 'constant_with_warmup'])
    }

In [None]:
#hide_output
!pip install optuna
!pip install wandb
import wandb
wandb_kwargs = {"project": "hyperparameters"}

In [27]:
wandb.init(project="hyperparameters",name="short_run")

ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 

··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [None]:
!pip3 install wandb
import optuna
!pip install shortuuid==1.0.1
import wandb
from optuna.integration.wandb import WeightsAndBiasesCallback
wandb_kwargs = {"project": "hyperparameters"}
wandbc = WeightsAndBiasesCallback(wandb_kwargs=wandb_kwargs)

In [36]:
best_run = trainer.hyperparameter_search(
    n_trials=15, direction="maximize", hp_space=hp_space)

[32m[I 2022-12-12 09:00:49,138][0m A new study created in memory with name: no-name-f198c3e0-e7dd-450a-9a8e-b1d028794013[0m
Trial: {'learning_rate': 3.6107212281458205e-05, 'weight_decay': 0.026672900647824342, 'per_device_train_batch_size': 6, 'num_train_epochs': 7, 'lr_scheduler_type': 'cosine'}
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--danielsaggau--bregman_1.5/snapshots/363ae19253237bb845fc9861c93ac6033414e92d/config.json
Model config LongformerConfig {
  "_name_or_path": "danielsaggau/bregman_1.5",
  "architectures": [
    "LongformerModel"
  ],
  "attention_mode": "longformer",
  "attention_probs_dropout_prob": 0.1,
  "attention_window": [
    128,
    128,
    128,
    128,
    128,
    128
  ],
  "bos_token_id": 0,
  "classifier_dropout": null,
  "cls_token_id": 101,
  "eos_token_id": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 512,
  "id2label": {
    "0": "LA

Epoch,Training Loss,Validation Loss,F1-micro,F1-macro
1,No log,1.530931,0.585714,0.204468
2,No log,1.300048,0.614286,0.265071
3,No log,1.189087,0.628571,0.304178
4,No log,1.164882,0.671429,0.332559
5,No log,1.197506,0.671429,0.332186
6,1.021600,1.177126,0.678571,0.373905
7,1.021600,1.184705,0.685714,0.376037


Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on C

Epoch,Training Loss,Validation Loss,F1-micro,F1-macro
1,No log,1.482125,0.564286,0.183688
2,No log,1.290993,0.621429,0.299403
3,1.442200,1.202354,0.657143,0.33036
4,1.442200,1.294336,0.65,0.356255


Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on C

Epoch,Training Loss,Validation Loss,F1-micro,F1-macro
1,No log,1.942724,0.45,0.113813
2,No log,1.644531,0.528571,0.174646
3,No log,1.473308,0.614286,0.260807
4,No log,1.360925,0.621429,0.273482
5,No log,1.280127,0.678571,0.296961
6,1.579800,1.234453,0.678571,0.323276
7,1.579800,1.219307,0.635714,0.294854
8,1.579800,1.193324,0.642857,0.299283
9,1.579800,1.198131,0.65,0.322689
10,1.579800,1.190861,0.657143,0.328529


Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on C

Epoch,Training Loss,Validation Loss,F1-micro,F1-macro
1,No log,1.380272,0.6,0.208101
2,No log,1.345717,0.571429,0.284519
3,No log,1.274755,0.657143,0.363149
4,No log,1.182037,0.685714,0.459264
5,No log,1.588723,0.635714,0.386589
6,0.827200,1.923206,0.628571,0.352489
7,0.827200,1.848,0.642857,0.404662
8,0.827200,1.907236,0.657143,0.430701
9,0.827200,2.293891,0.614286,0.389957


Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on C

Epoch,Training Loss,Validation Loss,F1-micro,F1-macro
1,No log,1.410125,0.592857,0.205222
2,No log,1.323292,0.607143,0.286314
3,No log,1.174889,0.65,0.32496
4,No log,1.19177,0.671429,0.377162
5,No log,1.244882,0.671429,0.370238
6,0.875600,1.230967,0.678571,0.378502
7,0.875600,1.236089,0.678571,0.379854


Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on C

Epoch,Training Loss,Validation Loss,F1-micro,F1-macro
1,No log,2.127093,0.135714,0.019916
2,No log,2.34067,0.15,0.021739
3,No log,2.051695,0.314286,0.039855
4,2.296900,2.131682,0.314286,0.039855
5,2.296900,2.117508,0.135714,0.019916


Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on C

Epoch,Training Loss,Validation Loss,F1-micro,F1-macro
1,No log,1.487922,0.592857,0.207953
2,No log,1.326248,0.592857,0.258489


Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on C

Epoch,Training Loss,Validation Loss,F1-micro,F1-macro
1,No log,1.419182,0.621429,0.269489
2,No log,1.441407,0.528571,0.259981
3,No log,1.421885,0.578571,0.237446
4,No log,1.415152,0.65,0.373378
5,No log,1.600371,0.628571,0.363704
6,No log,1.877621,0.592857,0.299391
7,No log,2.07762,0.557143,0.412049


Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on C

Epoch,Training Loss,Validation Loss,F1-micro,F1-macro
1,No log,1.867848,0.442857,0.115692


Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on C

Epoch,Training Loss,Validation Loss,F1-micro,F1-macro
1,No log,1.750624,0.535714,0.175888
2,1.927300,1.468527,0.585714,0.222808
3,1.927300,1.293567,0.628571,0.27066
4,1.238400,1.207684,0.671429,0.294242
5,1.238400,1.199306,0.664286,0.310634
6,0.908800,1.199649,0.685714,0.341555
7,0.908800,1.20496,0.671429,0.333014


Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on C

Epoch,Training Loss,Validation Loss,F1-micro,F1-macro
1,No log,2.107275,0.314286,0.039855


Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on C

Epoch,Training Loss,Validation Loss,F1-micro,F1-macro
1,No log,1.505071,0.557143,0.180962
2,No log,1.274806,0.664286,0.290269
3,1.484600,1.210081,0.664286,0.330163
4,1.484600,1.216039,0.664286,0.331175


Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on C

Epoch,Training Loss,Validation Loss,F1-micro,F1-macro
1,No log,1.443396,0.55,0.179336


Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on C

Epoch,Training Loss,Validation Loss,F1-micro,F1-macro
1,No log,1.713693,0.542857,0.176394


Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on C

Epoch,Training Loss,Validation Loss,F1-micro,F1-macro
1,No log,1.303014,0.642857,0.255809
2,No log,1.568679,0.578571,0.291293
3,No log,1.659657,0.614286,0.294559
4,1.076000,1.599705,0.6,0.321477
5,1.076000,1.898735,0.614286,0.330249
6,1.076000,1.854603,0.635714,0.375057


Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on C

Epoch,Training Loss,Validation Loss,F1-micro,F1-macro
1,No log,1.589788,0.542857,0.178207


Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on CLS token...
Initializing global attention on C

In [37]:
best_run

BestRun(run_id='0', objective=1.0617515822621284, hyperparameters={'learning_rate': 3.6107212281458205e-05, 'weight_decay': 0.026672900647824342, 'per_device_train_batch_size': 6, 'num_train_epochs': 7, 'lr_scheduler_type': 'cosine'})

In [44]:
best_run.importance()

AttributeError: ignored