# 04 - Benchmarking

In [None]:
# for logging gpu usage
!pip install py3nvml

In [1]:
import numpy as np
import pandas as pd

from transformers import PyTorchBenchmark, PyTorchBenchmarkArguments

from src import models

# create model and load pre-trained checkpoint
distilbert = models.DistilBERT(pretrained_checkpoint='output/distilbert/distilbert_monitors_3epoch')
t5 = models.T5(pretrained_checkpoint='output/t5/t5_monitors_printers_3epoch')

In [2]:
args = PyTorchBenchmarkArguments(
    models=['distilbert-base-uncased', 't5-small'],
    batch_sizes=[8, 16, 32, 64],
    sequence_lengths=[128, 256, 512, 1024])

benchmark = PyTorchBenchmark(args, configs=[distilbert.model.config, t5.model.config])
o = benchmark.run()

1 / 2
Doesn't fit on GPU. CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling `cublasCreate(handle)`
Doesn't fit on GPU. CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling `cublasCreate(handle)`
Doesn't fit on GPU. CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling `cublasCreate(handle)`
Doesn't fit on GPU. CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling `cublasCreate(handle)`
Doesn't fit on GPU. CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling `cublasCreate(handle)`
Doesn't fit on GPU. CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling `cublasCreate(handle)`
Doesn't fit on GPU. CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling `cublasCreate(handle)`
Doesn't fit on GPU. CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling `cublasCreate(handle)`
2 / 2
Doesn't fit on GPU. CUDA out of memory. Tried to allocate 2.00 GiB (GPU 0; 11.17 GiB total capacity; 7.60 GiB already allocated; 1.61 GiB free; 9.23 GiB reserved in total by PyTorch)
Doesn't fit on GPU. C

In [3]:
o.time_inference_result

{'distilbert-base-uncased': {'bs': [8, 16, 32, 64],
  'ss': [128, 256, 512, 1024],
  'result': {8: {128: 0.047798289600177665,
    256: 0.07222247010031424,
    512: 0.126979520399982,
    1024: 'N/A'},
   16: {128: 0.061490233600125066,
    256: 0.12664895599991724,
    512: 0.2437034770999162,
    1024: 'N/A'},
   32: {128: 0.11908518790005473,
    256: 0.22610821339985704,
    512: 0.47391039329995693,
    1024: 'N/A'},
   64: {128: 0.2153486957999121,
    256: 0.43711628669989294,
    512: 0.9397330982999847,
    1024: 'N/A'}}},
 't5-small': {'bs': [8, 16, 32, 64],
  'ss': [128, 256, 512, 1024],
  'result': {8: {128: 0.09734471640003903,
    256: 0.2067505085000448,
    512: 0.42383799480012385,
    1024: 1.059572129799926},
   16: {128: 0.18667094730008102,
    256: 0.36821561540018594,
    512: 0.8047251864001737,
    1024: 2.0615904717000375},
   32: {128: 0.3322647165001399,
    256: 0.6984983140999248,
    512: 1.5949305439000454,
    1024: 4.109622462900006},
   64: {128: 0.6

In [4]:
o.memory_inference_result

{'distilbert-base-uncased': {'bs': [8, 16, 32, 64],
  'ss': [128, 256, 512, 1024],
  'result': {8: {128: 672, 256: 716, 512: 892, 1024: 'N/A'},
   16: {128: 728, 256: 796, 512: 1180, 1024: 'N/A'},
   32: {128: 832, 256: 988, 512: 1732, 1024: 'N/A'},
   64: {128: 1060, 256: 1348, 512: 2836, 1024: 'N/A'}}},
 't5-small': {'bs': [8, 16, 32, 64],
  'ss': [128, 256, 512, 1024],
  'result': {8: {128: 802, 256: 1022, 512: 1664, 1024: 3290},
   16: {128: 1006, 256: 1504, 512: 2634, 1024: 6006},
   32: {128: 1464, 256: 2378, 512: 4694, 1024: 11438},
   64: {128: 2298, 256: 4182, 512: 8816, 1024: 'N/A'}}}}

In [7]:
def extract_rows(result, measure_name):
    return [{'model_name': model, 'batch_size': bs, 'sequence_length': sl, measure_name: str(val)} 
            for model, group1 in result.items()
            for bs, group2 in group1['result'].items()
            for sl, val in group2.items()]

_time_df = pd.DataFrame(extract_rows(o.time_inference_result, 'time'))
_time_df['time'] = _time_df['time'].replace('N/A', np.nan).astype(float)
_mem_df = pd.DataFrame(extract_rows(o.memory_inference_result, 'memory'))
_mem_df['memory'] = _mem_df['memory'].replace('N/A', np.nan).astype(float)
df = pd.merge(_time_df, _mem_df, on=['model_name', 'batch_size', 'sequence_length'])
df

Unnamed: 0,model_name,batch_size,sequence_length,time,memory
0,distilbert-base-uncased,8,128,0.047798,672.0
1,distilbert-base-uncased,8,256,0.072222,716.0
2,distilbert-base-uncased,8,512,0.12698,892.0
3,distilbert-base-uncased,8,1024,,
4,distilbert-base-uncased,16,128,0.06149,728.0
5,distilbert-base-uncased,16,256,0.126649,796.0
6,distilbert-base-uncased,16,512,0.243703,1180.0
7,distilbert-base-uncased,16,1024,,
8,distilbert-base-uncased,32,128,0.119085,832.0
9,distilbert-base-uncased,32,256,0.226108,988.0


In [8]:
df.sort_values('time')

Unnamed: 0,model_name,batch_size,sequence_length,time,memory
0,distilbert-base-uncased,8,128,0.047798,672.0
4,distilbert-base-uncased,16,128,0.06149,728.0
1,distilbert-base-uncased,8,256,0.072222,716.0
16,t5-small,8,128,0.097345,802.0
8,distilbert-base-uncased,32,128,0.119085,832.0
5,distilbert-base-uncased,16,256,0.126649,796.0
2,distilbert-base-uncased,8,512,0.12698,892.0
20,t5-small,16,128,0.186671,1006.0
17,t5-small,8,256,0.206751,1022.0
12,distilbert-base-uncased,64,128,0.215349,1060.0


In [9]:
df.sort_values('memory')

Unnamed: 0,model_name,batch_size,sequence_length,time,memory
0,distilbert-base-uncased,8,128,0.047798,672.0
1,distilbert-base-uncased,8,256,0.072222,716.0
4,distilbert-base-uncased,16,128,0.06149,728.0
5,distilbert-base-uncased,16,256,0.126649,796.0
16,t5-small,8,128,0.097345,802.0
8,distilbert-base-uncased,32,128,0.119085,832.0
2,distilbert-base-uncased,8,512,0.12698,892.0
9,distilbert-base-uncased,32,256,0.226108,988.0
20,t5-small,16,128,0.186671,1006.0
17,t5-small,8,256,0.206751,1022.0
