In [None]:
!pip install datasets transformers torch wandb



In [None]:
from datasets import load_dataset, load_metric
from dataclasses import dataclass
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, set_seed
import numpy as np
from torch.utils.data import DataLoader
import pickle


import wandb
wandb.login()
%env WANDB_PROJECT=subset_active_learning

[34m[1mwandb[0m: Currently logged in as: [33mgarylai[0m. Use [1m`wandb login --relogin`[0m to force relogin


env: WANDB_PROJECT=subset_active_learning


In [None]:
class ActiveLearner():
    def __init__(self, config):
        ############ set up data ############
        set_seed(42)
        self.sst2 = load_dataset("sst")
        self.valid_ds = self.preprocess(self.sst2["validation"])
        self.test_ds = self.preprocess(self.sst2["test"])
        ############ helper functions ############
        self.tokenizer = AutoTokenizer.from_pretrained(config.model_name)
        self.metric = load_metric("accuracy")
        
    def preprocess(self, data):
        data = data.rename_column('label', 'scalar_label')
        data = data.map(lambda x: {'label' : 0 if x['scalar_label'] < 0.5 else 1})

        def tokenize_func(examples): 
            tokenized = self.tokenizer(
                examples["sentence"], padding="max_length", max_length=self.config.max_length, truncation=True
            )
            tokenized["labels"] = examples["label"]
            return tokenized

        ds = data.map(
            tokenize_func,
            remove_columns=data.column_names,
            batched=True,
        )
        ds.set_format(type="torch")
        return ds

    def compute_metrics(self, eval_pred):
        logits, labels = eval_pred
        predictions = np.argmax(logits, axis=-1)
        return self.metric.compute(predictions=predictions, references=labels)
    
    def sample_data(self, strategy, n_samples):
        if strategy == "random_sampling":
            print(f"config strategy is {config.strategy}")
            selected_indices = np.random.choice(len(self.sst2["train"]), replace=False, size=config.sampling_size)
        return selected_indices

    def train(self):
        for n_samples in self.config.sampling_sizes:
            self.step(n_samples)

    def step(self, n_samples):
        """Take an active learning step"""
        ########### set up data ######### 
        # sample new data
        sampled_data = self.sample_data(self.config.strategy, n_samples)
        # concatenate the sampled data with the original data
        self.train_data_indices.extend(sampled_data)
        train_data = self.sst2["train"].select(self.train_data_indices)
        debug_data = self.sst2["train"].select(self.train_data_indices[:8])

        self.train_ds = self.preprocess(train_data)
        self.valid_ds = self.preprocess(self.sst2["validation"])
        self.debug_ds = self.preprocess(debug_data)

        ########### set up training #########
        training_args = TrainingArguments(output_dir=dir, 
                                        max_steps=self.config.max_steps if not self.config.debug else 640, 
                                        evaluation_strategy="steps", 
                                        eval_steps=self.config.max_steps//20, 
                                        report_to="wandb", 
                                        run_name=f"{self.config.strategy}-size-{n_samples}")
        print(f"training_args: {training_args}")
        model = AutoModelForSequenceClassification.from_pretrained(self.config.model_name, num_labels=2)
        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=self.train_ds if not self.config.debug else self.debug_ds,
            eval_dataset=self.valid_ds if not self.config.debug else self.debug_ds,
            compute_metrics=self.compute_metrics,
        )
        ######### train #######
        trainer.train()
        wandb.finish()

        ######## test ########
        outputs = trainer.predict(self.test_ds)
        with open(f"{dir}/test_set_evaluation.pkl", "wb") as f: 
            pickle.dump(outputs, f)

In [None]:
@dataclass(frozen=True)
class Config:
    max_length: int = 66
    debug: bool = False
    model_name: str = "google/electra-small-discriminator"
    strategy: str = "random_sampling"
    sampling_sizes: tuple = (1000, 2000, 3000, 4000)
    max_steps: int = 10000

config = Config(debug=True)

In [None]:
active_learner = ActiveLearner(config)
active_learner.train()

In [None]:
for sampling_size in (3000, 5000, 7000):
  config = Config(max_length=66, debug=False, strategy="random_sampling", sampling_size=sampling_size, max_steps=10000)
  dir = f"/content/drive/MyDrive/active_learning/{config.strategy}/size_{config.sampling_size}"
  !mkdir $dir
  train(config, dir)

mkdir: cannot create directory ‘/content/drive/MyDrive/active_learning/random_sampling/size_3000’: File exists


No config specified, defaulting to: sst/default
Reusing dataset sst (/root/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff)


  0%|          | 0/3 [00:00<?, ?it/s]

config strategy is random_sampling


  0%|          | 0/3000 [00:00<?, ?ex/s]

  0%|          | 0/3 [00:00<?, ?ba/s]

Loading cached processed dataset at /root/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff/cache-ce39997f8c4d62b8.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff/cache-31ad22bf544a6b9a.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff/cache-c490c4f1acb5329d.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff/cache-2067a81432ae9c07.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff/cache-7dc9b7dd397a1836.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/sst/default/1.0.0/b8

training set lengths:  [3000, 1101, 2210, 8]
training_args: TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=True,
do_predict=False,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=500,
evaluation_strategy=IntervalStrategy.STEPS,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_min_num_params=0,
full_determinism=False,
gradient_accumulation_steps=1,
gradient_checkpointing=False,
greater_is_better=None,
group_by_length=False,
half_precision_backend=auto,
hub_model_id=None,
hub_private_repo=False,
hub_strategy=HubStrategy.EVERY_SAVE,
hub_token=<HUB_TOKEN>,
ignore_data_skip=False,
include_inputs_for_metrics=False,

Some weights of the model checkpoint at google/electra-small-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier

Step,Training Loss,Validation Loss,Accuracy
500,0.5639,0.546771,0.777475
1000,0.4119,0.915902,0.728429
1500,0.3269,0.741888,0.810173
2000,0.2112,0.985267,0.80109
2500,0.1475,1.069582,0.792916
3000,0.0933,1.218565,0.799273
3500,0.0558,1.260575,0.809264
4000,0.0473,1.400956,0.807448
4500,0.0524,1.491509,0.787466
5000,0.0349,1.43161,0.805631


***** Running Evaluation *****
  Num examples = 1101
  Batch size = 8
Saving model checkpoint to /content/drive/MyDrive/active_learning/random_sampling/size_3000/checkpoint-500
Configuration saved in /content/drive/MyDrive/active_learning/random_sampling/size_3000/checkpoint-500/config.json
Model weights saved in /content/drive/MyDrive/active_learning/random_sampling/size_3000/checkpoint-500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1101
  Batch size = 8
Saving model checkpoint to /content/drive/MyDrive/active_learning/random_sampling/size_3000/checkpoint-1000
Configuration saved in /content/drive/MyDrive/active_learning/random_sampling/size_3000/checkpoint-1000/config.json
Model weights saved in /content/drive/MyDrive/active_learning/random_sampling/size_3000/checkpoint-1000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1101
  Batch size = 8
Saving model checkpoint to /content/drive/MyDrive/active_learning/random_sampling/size_3000/checkpoin

VBox(children=(Label(value='13.564 MB of 13.564 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, m…

0,1
eval/accuracy,▅▁█▇▆▇██▆█▇▆▆█▇███▇█
eval/loss,▁▃▂▄▄▅▅▆▇▆▇▇████████
eval/runtime,▅▃▆▄▁▇▆▅█▄▅▅▆▆▃▄▅▄▇▆
eval/samples_per_second,▄▆▃▅█▂▃▄▁▅▄▄▃▃▆▅▄▅▂▃
eval/steps_per_second,▄▆▃▅█▂▃▄▁▅▄▄▃▃▆▅▄▅▂▃
train/epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
train/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
train/learning_rate,██▇▇▇▆▆▅▅▅▄▄▄▃▃▂▂▂▁▁
train/loss,█▆▅▄▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
train/total_flos,▁

0,1
eval/accuracy,0.80563
eval/loss,1.6765
eval/runtime,2.5418
eval/samples_per_second,433.161
eval/steps_per_second,54.293
train/epoch,26.67
train/global_step,10000.0
train/learning_rate,0.0
train/loss,0.0049
train/total_flos,303390046080000.0


***** Running Prediction *****
  Num examples = 2210
  Batch size = 8


mkdir: cannot create directory ‘/content/drive/MyDrive/active_learning/random_sampling/size_5000’: File exists


loading configuration file https://huggingface.co/google/electra-small-discriminator/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/ca13c16218c6780ec76753d3afa19fcb7cc759e3f63ee87e441562d374762b3d.3dd1921e571dfa18c0bdaa17b9b38f111097812281989b1cb22263738e66ef73
Model config ElectraConfig {
  "_name_or_path": "google/electra-small-discriminator",
  "architectures": [
    "ElectraForPreTraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "embedding_size": 128,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "electra",
  "num_attention_heads": 4,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "summary_activation": "gelu",
  "summary_last_dropout": 0.1,
  "summary_type": "first",
  "summary_use_proj": true,
  "transformers_v

  0%|          | 0/3 [00:00<?, ?it/s]

config strategy is random_sampling


  0%|          | 0/5000 [00:00<?, ?ex/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

Loading cached processed dataset at /root/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff/cache-ce39997f8c4d62b8.arrow


  0%|          | 0/2 [00:00<?, ?ba/s]

Loading cached processed dataset at /root/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff/cache-c490c4f1acb5329d.arrow


  0%|          | 0/3 [00:00<?, ?ba/s]

Loading cached processed dataset at /root/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff/cache-7dc9b7dd397a1836.arrow


  0%|          | 0/1 [00:00<?, ?ba/s]

PyTorch: setting up devices


training set lengths:  [5000, 1101, 2210, 8]
training_args: TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=True,
do_predict=False,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=500,
evaluation_strategy=IntervalStrategy.STEPS,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_min_num_params=0,
full_determinism=False,
gradient_accumulation_steps=1,
gradient_checkpointing=False,
greater_is_better=None,
group_by_length=False,
half_precision_backend=auto,
hub_model_id=None,
hub_private_repo=False,
hub_strategy=HubStrategy.EVERY_SAVE,
hub_token=<HUB_TOKEN>,
ignore_data_skip=False,
include_inputs_for_metrics=False,

loading configuration file https://huggingface.co/google/electra-small-discriminator/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/ca13c16218c6780ec76753d3afa19fcb7cc759e3f63ee87e441562d374762b3d.3dd1921e571dfa18c0bdaa17b9b38f111097812281989b1cb22263738e66ef73
Model config ElectraConfig {
  "_name_or_path": "google/electra-small-discriminator",
  "architectures": [
    "ElectraForPreTraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "embedding_size": 128,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "electra",
  "num_attention_heads": 4,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "summary_activation": "gelu",
  "summary_last_dropout": 0.1,
  "summary_type": "first",
  "summary_use_proj": true,
  "transformers_v

Step,Training Loss,Validation Loss,Accuracy
500,0.549,0.473582,0.817439
1000,0.4148,0.473139,0.829246
1500,0.3558,0.616494,0.826521
2000,0.3003,0.716283,0.824705
2500,0.2121,0.849306,0.82198
3000,0.1641,0.920792,0.817439
3500,0.1356,0.944576,0.821072
4000,0.1032,1.01268,0.821072
4500,0.0753,1.122677,0.822888
5000,0.0737,1.091924,0.82743


***** Running Evaluation *****
  Num examples = 1101
  Batch size = 8
Saving model checkpoint to /content/drive/MyDrive/active_learning/random_sampling/size_5000/checkpoint-500
Configuration saved in /content/drive/MyDrive/active_learning/random_sampling/size_5000/checkpoint-500/config.json
Model weights saved in /content/drive/MyDrive/active_learning/random_sampling/size_5000/checkpoint-500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1101
  Batch size = 8
Saving model checkpoint to /content/drive/MyDrive/active_learning/random_sampling/size_5000/checkpoint-1000
Configuration saved in /content/drive/MyDrive/active_learning/random_sampling/size_5000/checkpoint-1000/config.json
Model weights saved in /content/drive/MyDrive/active_learning/random_sampling/size_5000/checkpoint-1000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1101
  Batch size = 8
Saving model checkpoint to /content/drive/MyDrive/active_learning/random_sampling/size_5000/checkpoin

VBox(children=(Label(value='13.662 MB of 13.662 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, m…

0,1
eval/accuracy,▁▅▄▃▂▁▂▂▃▄▄▇▅█▄▅▇▂▅▇
eval/loss,▁▁▂▃▄▄▄▅▅▅▆▅▆▆▇▇▇██▇
eval/runtime,▅▄▇▆█▅▄▂▁▃▄▂▆▂▄▅▆▅█▅
eval/samples_per_second,▄▅▂▃▁▄▅▇█▆▅▇▃▇▅▄▃▄▁▄
eval/steps_per_second,▄▅▂▃▁▄▅▇█▆▅▇▃▇▅▄▃▄▁▄
train/epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
train/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
train/learning_rate,██▇▇▇▆▆▅▅▅▄▄▄▃▃▂▂▂▁▁
train/loss,█▆▆▅▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁
train/total_flos,▁

0,1
eval/accuracy,0.83742
eval/loss,1.40796
eval/runtime,2.5713
eval/samples_per_second,428.181
eval/steps_per_second,53.669
train/epoch,16.0
train/global_step,10000.0
train/learning_rate,0.0
train/loss,0.0116
train/total_flos,303390046080000.0


***** Running Prediction *****
  Num examples = 2210
  Batch size = 8


mkdir: cannot create directory ‘/content/drive/MyDrive/active_learning/random_sampling/size_7000’: File exists


loading configuration file https://huggingface.co/google/electra-small-discriminator/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/ca13c16218c6780ec76753d3afa19fcb7cc759e3f63ee87e441562d374762b3d.3dd1921e571dfa18c0bdaa17b9b38f111097812281989b1cb22263738e66ef73
Model config ElectraConfig {
  "_name_or_path": "google/electra-small-discriminator",
  "architectures": [
    "ElectraForPreTraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "embedding_size": 128,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "electra",
  "num_attention_heads": 4,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "summary_activation": "gelu",
  "summary_last_dropout": 0.1,
  "summary_type": "first",
  "summary_use_proj": true,
  "transformers_v

  0%|          | 0/3 [00:00<?, ?it/s]

config strategy is random_sampling


  0%|          | 0/7000 [00:00<?, ?ex/s]

  0%|          | 0/7 [00:00<?, ?ba/s]

Loading cached processed dataset at /root/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff/cache-ce39997f8c4d62b8.arrow


  0%|          | 0/2 [00:00<?, ?ba/s]

Loading cached processed dataset at /root/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff/cache-c490c4f1acb5329d.arrow


  0%|          | 0/3 [00:00<?, ?ba/s]

Loading cached processed dataset at /root/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff/cache-7dc9b7dd397a1836.arrow


  0%|          | 0/1 [00:00<?, ?ba/s]

PyTorch: setting up devices


training set lengths:  [7000, 1101, 2210, 8]
training_args: TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=True,
do_predict=False,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=500,
evaluation_strategy=IntervalStrategy.STEPS,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_min_num_params=0,
full_determinism=False,
gradient_accumulation_steps=1,
gradient_checkpointing=False,
greater_is_better=None,
group_by_length=False,
half_precision_backend=auto,
hub_model_id=None,
hub_private_repo=False,
hub_strategy=HubStrategy.EVERY_SAVE,
hub_token=<HUB_TOKEN>,
ignore_data_skip=False,
include_inputs_for_metrics=False,

loading configuration file https://huggingface.co/google/electra-small-discriminator/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/ca13c16218c6780ec76753d3afa19fcb7cc759e3f63ee87e441562d374762b3d.3dd1921e571dfa18c0bdaa17b9b38f111097812281989b1cb22263738e66ef73
Model config ElectraConfig {
  "_name_or_path": "google/electra-small-discriminator",
  "architectures": [
    "ElectraForPreTraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "embedding_size": 128,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "electra",
  "num_attention_heads": 4,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "summary_activation": "gelu",
  "summary_last_dropout": 0.1,
  "summary_type": "first",
  "summary_use_proj": true,
  "transformers_v

Step,Training Loss,Validation Loss,Accuracy
500,0.5755,0.435988,0.81653
1000,0.4538,0.431505,0.824705
1500,0.3939,0.61534,0.811989
2000,0.3665,0.589761,0.820163
2500,0.3212,0.577386,0.828338
3000,0.2567,0.665141,0.821072
3500,0.2559,0.80992,0.81653
4000,0.1788,0.75347,0.843778
4500,0.1843,0.864364,0.822888
5000,0.1417,0.833706,0.841054


***** Running Evaluation *****
  Num examples = 1101
  Batch size = 8
Saving model checkpoint to /content/drive/MyDrive/active_learning/random_sampling/size_7000/checkpoint-500
Configuration saved in /content/drive/MyDrive/active_learning/random_sampling/size_7000/checkpoint-500/config.json
Model weights saved in /content/drive/MyDrive/active_learning/random_sampling/size_7000/checkpoint-500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1101
  Batch size = 8
Saving model checkpoint to /content/drive/MyDrive/active_learning/random_sampling/size_7000/checkpoint-1000
Configuration saved in /content/drive/MyDrive/active_learning/random_sampling/size_7000/checkpoint-1000/config.json
Model weights saved in /content/drive/MyDrive/active_learning/random_sampling/size_7000/checkpoint-1000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1101
  Batch size = 8
Saving model checkpoint to /content/drive/MyDrive/active_learning/random_sampling/size_7000/checkpoin

VBox(children=(Label(value='13.751 MB of 13.751 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, m…

0,1
eval/accuracy,▂▄▁▃▅▃▂█▃▇▅▆▃▄▅▆▆▆▇▆
eval/loss,▁▁▃▃▂▃▅▄▅▅▆▆▇▆█▇▇███
eval/runtime,▄▆▆▂▅▃▄▁▆▃▆█▆▆▃▅▅▄▅▃
eval/samples_per_second,▅▃▃▇▄▅▅█▃▆▃▁▃▃▅▄▄▅▄▆
eval/steps_per_second,▅▃▃▇▄▅▅█▃▆▃▁▃▃▅▄▄▅▄▆
train/epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
train/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
train/learning_rate,██▇▇▇▆▆▅▅▅▄▄▄▃▃▂▂▂▁▁
train/loss,█▆▆▅▅▄▄▃▃▂▂▂▂▂▁▁▁▁▁▁
train/total_flos,▁

0,1
eval/accuracy,0.8347
eval/loss,1.14796
eval/runtime,2.5731
eval/samples_per_second,427.892
eval/steps_per_second,53.632
train/epoch,11.43
train/global_step,10000.0
train/learning_rate,0.0
train/loss,0.0357
train/total_flos,303390046080000.0


***** Running Prediction *****
  Num examples = 2210
  Batch size = 8


In [None]:
# with open(f"{dir}/test_set_evaluation.pkl", "rb") as f:
#     loaded_outputs = pickle.load(f)