### Validating our inference and benchmarking scripts based on OpenAI's GPT2 model

In [1]:
import qtransform
import logging
import torch

In [2]:
#qtransform.notebook_run(["run=infer", "run.pretrained_model=gpt2"], logging.INFO)

In [3]:
'run=bench run.pretrained_model=gpt2 run.num_samples=1 dataset=huggingface dataset.name=wikitext dataset/tokenizer=tiktoken dataset.tokenizer.encoding="gpt2" +model.args.block_size=1024'.split(' ')

['run=bench',
 'run.pretrained_model=gpt2',
 'run.num_samples=1',
 'dataset=huggingface',
 'dataset.name=wikitext',
 'dataset/tokenizer=tiktoken',
 'dataset.tokenizer.encoding="gpt2"',
 '+model.args.block_size=1024']

In [4]:
args_benchmarking = ['run=bench',
 'run.pretrained_model=gpt2',
 'run.num_samples=5',
 'dataset=huggingface',
 'dataset.name=wikitext',
 'dataset.subset=wikitext-2-raw-v1',
 'dataset/tokenizer=tiktoken',
 'dataset.tokenizer.encoding="gpt2"',
 '+model.args.block_size=1024']
qtransform.notebook_run(args_benchmarking,logging.INFO)

{'data': {'dtype': 'float32'}, 'device': 'cuda', 'debug': False, 'dataset': {'wrapper': 'HuggingfaceDatasetWrapper', 'module': 'huggingface', 'name': 'wikitext', 'root_path': '~/.qtransform/datasets', 'dataset_dir': ['${dataset.root_path}', '${dataset.module}', '${dataset.name}'], 'sizes': {'train': 0.0, 'eval': 0.0, 'bench': 0.0}, 'tokenizer': {'dtype': '${data.dtype}', 'meta_file': 'meta.pkl', 'wrapper': 'TikTokenizer', 'encoding': 'gpt2', 'module': 'tiktoken'}, 'dataloader': {'shuffle': True, 'num_workers': 2, 'batch_size': 12}, 'subset': 'wikitext-2-raw-v1', 'type': 'huggingface', 'splits': {'names': {'train': 'train', 'eval': 'validation', 'bench': 'test'}, 'sizes': {'train': 0.9, 'eval': 0.05, 'bench': 0.05}}, 'args': {'block_size': '${model.args.block_size}', 'cache_dir': None, 'data_column_name': 'text', 'batches': 1000, 'chunking': True, 'chunk_size': 100}}, 'seed': 1234567890, 'model': {'calc_loss_in_model': False, 'args': {'block_size': 1024}}, 'quantization': {'quantize': F

STAGE:2024-03-07 16:38:30 170824:170824 ActivityProfilerController.cpp:311] Completed Stage: Warm Up


[ [36m2024-03-07 16:38:34,466 [0m][[2;37mqtransform.run.bench[0m][[32mINFO[0m] - [32mBenchmark results: 
┌────────────────────┬───────────┬────────────┐
│ path               │   avg_ppl │   acc_in_% │
├────────────────────┼───────────┼────────────┤
│ hf-pretrained-gpt2 │   189.862 │    23.7988 │
└────────────────────┴───────────┴────────────┘[0m


STAGE:2024-03-07 16:38:34 170824:170824 ActivityProfilerController.cpp:317] Completed Stage: Collection
STAGE:2024-03-07 16:38:34 170824:170824 ActivityProfilerController.cpp:321] Completed Stage: Post Processing


[ [36m2024-03-07 16:38:35,098 [0m][[2;37mqtransform.run.bench[0m][[32mINFO[0m] - [32m
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                             cudaMalloc        43.65%        1.678s        43.65%        1.678s       4.314ms       0.000us    

In [5]:
import torch
import torch.nn.functional as F

def measure_perplexity(logits: torch.Tensor, labels: torch.Tensor):
    #cross entropy either expects the probabilities of tokens or a list of tokens
    #(https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html)
    return torch.exp(F.cross_entropy(logits.view(-1, logits.size(-1)), labels.view(-1), ignore_index=-1))

In [2]:
labels = torch.rand(3,8,16)

In [3]:
labels

tensor([[[5.3237e-01, 9.4450e-01, 2.2480e-02, 2.6777e-01, 7.4107e-01,
          9.7566e-01, 4.9895e-01, 8.0610e-01, 9.6529e-01, 1.2113e-01,
          2.1593e-01, 5.1844e-01, 2.8345e-01, 5.5469e-01, 4.0212e-01,
          9.3912e-01],
         [9.8756e-01, 4.3164e-02, 8.7322e-02, 4.3465e-01, 6.0527e-01,
          8.3735e-01, 4.5197e-01, 6.8085e-01, 7.6600e-01, 1.9580e-01,
          1.6626e-01, 3.3511e-01, 2.6638e-01, 9.3222e-01, 5.4504e-01,
          7.4120e-01],
         [4.8294e-01, 2.9383e-01, 9.0077e-01, 6.3442e-01, 1.9574e-01,
          6.0947e-01, 2.0018e-01, 9.5357e-01, 7.2274e-01, 3.2522e-01,
          7.9905e-01, 1.0815e-01, 2.2052e-01, 9.4270e-01, 4.5376e-02,
          2.5447e-01],
         [3.4326e-01, 8.7107e-01, 3.0462e-01, 4.5464e-01, 2.6759e-01,
          6.0171e-01, 2.6769e-01, 7.5461e-01, 4.5220e-01, 8.8607e-01,
          5.1806e-01, 7.8535e-01, 1.3931e-01, 7.0787e-01, 1.7979e-01,
          8.3452e-01],
         [9.1938e-02, 8.6747e-01, 3.6445e-02, 9.2946e-01, 3.9016e-01

In [5]:
labels[..., :-1, :]

torch.Size([3, 7, 16])