In [1]:
import torch
import torchvision.models as models
from torch.profiler import profile, record_function, ProfilerActivity

  from .autonotebook import tqdm as notebook_tqdm


## ResNet

In [5]:
model = models.resnet18().cuda()
inputs = torch.randn(5, 3, 224, 224).cuda()

In [23]:
with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], profile_memory=True, record_shapes=True, with_stack=True) as prof:
    with record_function("model_inferenceeeeeeeee"): 
        for _ in range(5):
            output = model(inputs)

prof.export_chrome_trace("trace.json")

In [19]:
print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                model_inferenceeeeeeeee         9.15%       3.292ms        99.79%      35.886ms      35.886ms       0.000us         0.00%       4.268ms       4.268ms          -4 b        -268 b    -176.00 Kb    -109.74 M

In [21]:
prof.export_stacks("./profiler_stacks.txt", "self_cuda_time_total")

In [None]:
# Visualize as flamegrpah
"""
cd Flamegraph
./flamegraph.pl --title "CUDA time" --countname "us." /Users/garylai/Dev/subset-active-learning/local_bucket/profiler_stacks.txt > perf_viz.svg
open .
"""

## Electra Small

In [3]:
from subset_active_learning.subset_selection import select, preprocess
MODEL_CARD = "roberta-large"
train_args = select.SubsetTrainingArguments(max_steps=1, model_card=MODEL_CARD)
processed_ds = preprocess.preprocess_sst2(MODEL_CARD)
subset_trainer = select.SubsetTrainer(params=train_args, valid_ds=processed_ds["validation"].select(range(5)), test_ds=processed_ds["test"])

No config specified, defaulting to: sst/default
Reusing dataset sst (/home/glai/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff)
100%|██████████| 3/3 [00:00<00:00, 750.90it/s]
Downloading: 100%|██████████| 482/482 [00:00<00:00, 306kB/s]
Downloading: 100%|██████████| 899k/899k [00:00<00:00, 2.92MB/s]
Downloading: 100%|██████████| 456k/456k [00:00<00:00, 1.85MB/s]
Downloading: 100%|██████████| 1.36M/1.36M [00:00<00:00, 3.51MB/s]
100%|██████████| 8544/8544 [00:02<00:00, 3618.59ex/s]
100%|██████████| 1101/1101 [00:00<00:00, 3576.10ex/s]
100%|██████████| 2210/2210 [00:00<00:00, 3749.55ex/s]
100%|██████████| 8544/8544 [00:01<00:00, 6485.67ex/s]
100%|██████████| 1101/1101 [00:00<00:00, 6663.57ex/s]
100%|██████████| 2210/2210 [00:00<00:00, 6668.17ex/s]


In [None]:
with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], profile_memory=True, record_shapes=True, with_stack=True) as prof:
    with record_function("electra"): 
        subset_trainer.train(subset=processed_ds["train"].select(range(100)))

In [15]:
import sqlite3
from typing import Any, Optional
from pydantic import BaseModel, Extra, Field
from transformers import TrainingArguments, AutoModel, AutoTokenizer, AutoModelForSequenceClassification, get_scheduler
import numpy as np
import json
import datasets
import wandb
import torch
from tqdm import tqdm
from subset_active_learning.subset_selection import select, preprocess


N_RUNS = 20
DB_PATH = "/home/glai/dev/subset-active-learning/local_bucket/new_sst.db"

training_args = select.SubsetTrainingArguments()
searching_args = select.SubsetSearcherArguments(seed=0, db_path=DB_PATH)

processed_ds = preprocess.preprocess_sst2(training_args.model_card)

subset_trainer = select.SubsetTrainer(
    params=training_args, valid_ds=processed_ds["validation"], test_ds=processed_ds["test"]
)

data_pool = processed_ds["train"].shuffle(seed=searching_args.seed).select(range(searching_args.data_pool_size))
subset_searcher = select.SubsetSearcher(subset_trainer=subset_trainer, params=searching_args, data_pool=data_pool)


No config specified, defaulting to: sst/default
Reusing dataset sst (/home/glai/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff)
100%|██████████| 3/3 [00:00<00:00, 783.49it/s]
Loading cached processed dataset at /home/glai/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff/cache-7b23d5d1250b3cef.arrow
Loading cached processed dataset at /home/glai/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff/cache-6a94e7b0eb5aeeeb.arrow
Loading cached processed dataset at /home/glai/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff/cache-f1806eefe95f0de4.arrow
Loading cached processed dataset at /home/glai/.cache/huggingface/datasets/sst/default/1.0.0/b8a7889ef01c5d3ae8c379b84cc4080f8aad3ac2bc538701cbe0ac6416fb76ff/cache-e283858a38c31b48.arrow
Loading cache

In [16]:
with profile(activities=[
        ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True) as prof:
    with record_function("subset_search"):
        subset_searcher.search(n_runs=1)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


### RUN 0


[34m[1mwandb[0m: Currently logged in as: [33mgarylai[0m ([33mjohnny-gary[0m). Use [1m`wandb login --relogin`[0m to force relogin


Some weights of the model checkpoint at google/electra-small-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier

0,1
data_pool_size,▁
loss,██████▇▇█▇▆▅▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
optimal_subset_size,▁
sst2_test:accuracy,▁
sst:val_acc,▁▇████

0,1
data_pool_size,1000
indices,"[167, 331, 255, 734,..."
loss,0.00352
model_card,google/electra-small...
optimal_subset_size,100
sst2_test:accuracy,0.66787
sst:val_acc,0.64214


In [None]:
print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                        model_inference         0.14%       1.890ms       100.00%        1.318s        1.318s       0.000us         0.00%       4.334ms       4.334ms             1  
                                           aten::conv2d         0.00%      65.000us        99.52%        1.312s      65.593ms       0.000us         0.00%       3.213ms     160.650us            20  
         