In [1]:
%load_ext autoreload  
%autoreload 2 

In [2]:
import ffcv
from ffcv.writer import DatasetWriter
from ffcv.fields import IntField, NDArrayField, FloatField
import datasets
from subset_active_learning.subset_selection import select, preprocess
import wandb
import numpy as np
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import psutil

# Process.memory_info is expressed in bytes, so convert to megabytes
print(f"RAM used: {psutil.Process().memory_info().rss / (1024 * 1024):.2f} MB")

RAM used: 510.54 MB


In [4]:
processed_ds = preprocess.preprocess_sst2("google/electra-small-discriminator")

No config specified, defaulting to: sst/default
Found cached dataset sst (/home/glai/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff)
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3/3 [00:00<00:00, 698.86it/s]
Loading cached processed dataset at /home/glai/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff/cache-f8fd6701869c12c8.arrow
Loading cached processed dataset at /home/glai/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff/cache-92e4d8b08735d65d.arrow
Loading cached processed dataset at /home/glai/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff/cache-5fc3a07bc35230ee.arrow
Loading cached processed dataset at /home/glai/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff/cache-631cfbbaad8

In [5]:
# Process.memory_info is expressed in bytes, so convert to megabytes
print(f"RAM used: {psutil.Process().memory_info().rss / (1024 * 1024):.2f} MB")

RAM used: 529.50 MB


In [6]:
# Convert HF dataset into Torch dataset for ffcv support
class InMemorySST2(torch.utils.data.Dataset):
    def __init__(self, hf_ds):
        self.in_memory_ds = []
        for row in hf_ds: 
            self.in_memory_ds.append(row)
    
    def __getitem__(self, i):
        return self.in_memory_ds[i]
    
    def __len__(self):
        return len(self.in_memory_ds)

In [13]:
import time
from typing import List


class BatchSizeComparisonRun: 
    def __init__(self, train_ds: datasets.Dataset, valid_ds: datasets.Dataset, test_ds: datasets.Dataset, seed: int):
        self.train_ds,self.valid_ds,self.test_ds = train_ds,valid_ds,test_ds
        self.seed = seed

    def one_run(self, wandb_tags: List[str], config: select.SubsetTrainingArguments):
        wandb_tags.append(str(self.seed))
        wandb_run = wandb.init(project="subset-search-gpu-opt", 
        entity="johnny-gary", tags=wandb_tags)
        wandb.log({"batch_size": config.batch_size})
        subset_trainer = select.SubsetTrainer(
            params=config, valid_ds=self.valid_ds, test_ds=self.test_ds
        )
        start_time = time.time()
        subset_trainer.train_one_step(subset=self.train_ds, calculate_test_accuracy=True)
        wandb.log({"run_time": round(time.time() - start_time, 2)})
        wandb_run.finish()

    def run_comparison(self, small_batch_config: select.SubsetTrainingArguments, large_batch_config: select.SubsetTrainingArguments): 
        """
        - train small batch size until early stopping
        - train large batch size until early stopping
        """
        self.one_run(wandb_tag=[f"small_batch_{small_batch_config.batch_size}"], config=small_batch_config)
        self.one_run(wandb_tag=[f"large_batch_{large_batch_config.batch_size}"], config=large_batch_config)

In [8]:
INCREASE_FACTOR = 4
small_batch_config = select.SubsetTrainingArguments(batch_size=8, learning_rate=1e-5)
large_batch_config = select.SubsetTrainingArguments(batch_size=small_batch_config.batch_size*INCREASE_FACTOR, learning_rate=small_batch_config.learning_rate*(3/4))

In [14]:
############# In Memory Experiments ###############

valid_ds = InMemorySST2(hf_ds=processed_ds["validation"])
test_ds = InMemorySST2(hf_ds=processed_ds["test"]) 
    
for seed in range(42, 47):
    # in memory run 
    train_ds = InMemorySST2(hf_ds=processed_ds["train"].shuffle(seed=seed).select(range(100)))
    batch_size_comparison = BatchSizeComparisonRun(train_ds=train_ds, valid_ds=valid_ds, test_ds=test_ds, seed=seed)
    batch_size_comparison.one_run(wandb_tags=[f"large_batch_{large_batch_config.batch_size}", "in_memory_ds"], config=large_batch_config)

    # HF dataset run 
    batch_size_comparison = BatchSizeComparisonRun(train_ds=processed_ds["train"].shuffle(seed=seed).select(range(100)), valid_ds=processed_ds["validation"], test_ds=processed_ds["test"], seed=seed)
    batch_size_comparison.one_run(wandb_tags=[f"large_batch_{large_batch_config.batch_size}", "hf_ds"], config=large_batch_config)

Loading cached shuffled indices for dataset at /home/glai/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff/cache-09ed8c7981d8b355.arrow


  self.metric = datasets.load_metric(self.params.metric)
Downloading builder script: 4.21kB [00:00, 1.57MB/s]                   
Some weights of the model checkpoint at google/electra-small-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassifica

0,1
batch_size,‚ñÅ
loss,‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñá‚ñá‚ñÜ‚ñÜ‚ñÖ‚ñÑ‚ñÉ‚ñÉ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ
run_time,‚ñÅ
sst2_final_test:accuracy,‚ñÅ
sst2_final_valid:accuracy,‚ñÅ
sst:val_acc,‚ñÅ‚ñà‚ñà‚ñà

0,1
batch_size,32.0
loss,0.0137
run_time,92.54
sst2_final_test:accuracy,0.70407
sst2_final_valid:accuracy,0.68847
sst:val_acc,0.68847


Loading cached shuffled indices for dataset at /home/glai/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff/cache-09ed8c7981d8b355.arrow


Some weights of the model checkpoint at google/electra-small-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier

0,1
batch_size,‚ñÅ
loss,‚ñá‚ñà‚ñà‚ñà‚ñà‚ñà‚ñá‚ñá‚ñÖ‚ñÖ‚ñÉ‚ñÉ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ
run_time,‚ñÅ
sst2_final_test:accuracy,‚ñÅ
sst2_final_valid:accuracy,‚ñÅ
sst:val_acc,‚ñÅ‚ñá‚ñà‚ñà‚ñà‚ñà‚ñá

0,1
batch_size,32.0
loss,0.00323
run_time,183.5
sst2_final_test:accuracy,0.68281
sst2_final_valid:accuracy,0.67121
sst:val_acc,0.67121


Loading cached shuffled indices for dataset at /home/glai/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff/cache-a5e338283d74f5b6.arrow


Some weights of the model checkpoint at google/electra-small-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier

0,1
batch_size,‚ñÅ
loss,‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñá‚ñÜ‚ñÜ‚ñÖ‚ñÑ‚ñÉ‚ñÉ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ
run_time,‚ñÅ
sst2_final_test:accuracy,‚ñÅ
sst2_final_valid:accuracy,‚ñÅ
sst:val_acc,‚ñÅ‚ñà‚ñà‚ñà‚ñà

0,1
batch_size,32.0
loss,0.00858
run_time,117.32
sst2_final_test:accuracy,0.66154
sst2_final_valid:accuracy,0.6594
sst:val_acc,0.6594


Loading cached shuffled indices for dataset at /home/glai/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff/cache-a5e338283d74f5b6.arrow


Some weights of the model checkpoint at google/electra-small-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier

In [None]:
for seed in range(42, 47):
    train_ds = processed_ds["train"].shuffle(seed=seed).select(range(100))
    batch_size_comparison = BatchSizeComparisonRun(train_ds=train_ds, valid_ds=processed_ds["validation"], test_ds=processed_ds["test"])
    batch_size_comparison.run_comparison(small_batch_config=small_batch_config, large_batch_config=large_batch_config)