<a href="https://colab.research.google.com/github/agemagician/Prot-Transformers/blob/master/Benchmark/ProtElectra.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<b>Benchmark ProtElectra Model using GPU or CPU</b>

<b>1. Load necessry libraries including huggingface transformers</b>

In [1]:
!pip install -q transformers
!pip install -q gdown

[K     |████████████████████████████████| 675kB 4.7MB/s 
[K     |████████████████████████████████| 1.1MB 20.0MB/s 
[K     |████████████████████████████████| 890kB 30.7MB/s 
[K     |████████████████████████████████| 3.8MB 44.1MB/s 
[?25h  Building wheel for sacremoses (setup.py) ... [?25l[?25hdone


In [2]:
import torch
from transformers import ElectraForMaskedLM, ElectraModel, pipeline
import time
from datetime import timedelta
import os
import gdown

<B>2. Set the url location of ProtElectr and the vocabulary file</b>

In [3]:
generatorModelUrl = 'https://drive.google.com/uc?export=download&confirm=BTQ_&id=1vaB80ioD8MNFB3zE_5AD-QJtNy0389jg'
discriminatorModelUrl = 'https://drive.google.com/uc?export=download&confirm=BTQ_&id=1xMUwFYs4tgD7qIs7XrrqQ6tKabH7ZyS9'

generatorConfigUrl = 'https://drive.google.com/uc?export=download&confirm=BTQ_&id=1SBtS-9_Wy26vZDjXBEos9KuiQc7TChhT'
discriminatorConfigUrl = 'https://drive.google.com/uc?export=download&confirm=BTQ_&id=1jZQLHL4TTMK5eoWL-JhihiVRVoUepC_B'

vocabUrl = 'https://drive.google.com/uc?export=download&confirm=BTQ_&id=1vuAP1zRvN1c6EHoSQMVC2ivZMTpzYR0P'

<b>3. Download ProtElectra models and vocabulary files</b>

In [4]:
downloadFolderPath = 'models/electra/'

In [5]:
discriminatorFolderPath = os.path.join(downloadFolderPath, 'discriminator')
generatorFolderPath = os.path.join(downloadFolderPath, 'generator')

discriminatorModelFilePath = os.path.join(discriminatorFolderPath, 'pytorch_model.bin')
generatorModelFilePath = os.path.join(generatorFolderPath, 'pytorch_model.bin')

discriminatorConfigFilePath = os.path.join(discriminatorFolderPath, 'config.json')
generatorConfigFilePath = os.path.join(generatorFolderPath, 'config.json')

vocabFilePath = os.path.join(downloadFolderPath, 'vocab.txt')

In [6]:
if not os.path.exists(discriminatorFolderPath):
    os.makedirs(discriminatorFolderPath)
if not os.path.exists(generatorFolderPath):
    os.makedirs(generatorFolderPath)

In [7]:
def download_file(url,filename):
  while not os.path.exists(filename):
    gdown.download(url,filename, quiet=False)

In [8]:
if not os.path.exists(generatorModelFilePath):
    download_file(generatorModelUrl, generatorModelFilePath)

if not os.path.exists(discriminatorModelFilePath):
    download_file(discriminatorModelUrl, discriminatorModelFilePath)
    
if not os.path.exists(generatorConfigFilePath):
    download_file(generatorConfigUrl, generatorConfigFilePath)

if not os.path.exists(discriminatorConfigFilePath):
    download_file(discriminatorConfigUrl, discriminatorConfigFilePath)
    
if not os.path.exists(vocabFilePath):
    download_file(vocabUrl, vocabFilePath)

Permission denied: https://drive.google.com/uc?export=download&confirm=BTQ_&id=1vaB80ioD8MNFB3zE_5AD-QJtNy0389jg
Maybe you need to change permission over 'Anyone with the link'?
Downloading...
From: https://drive.google.com/uc?export=download&confirm=BTQ_&id=1vaB80ioD8MNFB3zE_5AD-QJtNy0389jg
To: /content/models/electra/generator/pytorch_model.bin
261MB [00:03, 83.5MB/s]
Downloading...
From: https://drive.google.com/uc?export=download&confirm=BTQ_&id=1xMUwFYs4tgD7qIs7XrrqQ6tKabH7ZyS9
To: /content/models/electra/discriminator/pytorch_model.bin
1.68GB [00:19, 85.7MB/s]
Permission denied: https://drive.google.com/uc?export=download&confirm=BTQ_&id=1SBtS-9_Wy26vZDjXBEos9KuiQc7TChhT
Maybe you need to change permission over 'Anyone with the link'?
Downloading...
From: https://drive.google.com/uc?export=download&confirm=BTQ_&id=1SBtS-9_Wy26vZDjXBEos9KuiQc7TChhT
To: /content/models/electra/generator/config.json
100%|██████████| 463/463 [00:00<00:00, 541kB/s]
Downloading...
From: https://drive.g


<b>4. Load Electra and generator Models</b>

In [9]:
electra = ElectraModel.from_pretrained(discriminatorFolderPath)

In [10]:
generator = ElectraForMaskedLM.from_pretrained(generatorFolderPath)

<b>5. Load the models into the GPU if avilabile</b>

In [11]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [12]:
generator = generator.to(device)
generator = generator.eval()

In [13]:
electra = electra.to(device)
electra = electra.eval()

<b>6. Benchmark Configuration</b>

In [14]:
min_batch_size = 8
max_batch_size = 32
inc_batch_size = 8

min_sequence_length = 64
max_sequence_length = 512
inc_sequence_length = 64

iterations = 10

<b>7. Start Benchmarking Electra</b>

In [15]:
device_name = torch.cuda.get_device_name(device.index) if device.type == 'cuda' else 'CPU'

with torch.no_grad():
    print((' Benchmarking using ' + device_name + ' ').center(80, '*'))
    print(' Start '.center(80, '*'))
    for sequence_length in range(min_sequence_length,max_sequence_length+1,inc_sequence_length):
        for batch_size in range(min_batch_size,max_batch_size+1,inc_batch_size):
            start = time.time()
            for i in range(iterations):
                input_ids = torch.randint(1, 20, (batch_size,sequence_length)).cuda()
                results = electra(input_ids)[0].cpu().numpy()
            end = time.time()
            ms_per_protein = (end-start)/(iterations*batch_size)
            print('Sequence Length: %4d \t Batch Size: %4d \t Ms per protein %4.2f' %(sequence_length,batch_size,ms_per_protein))
        print(' Done '.center(80, '*'))
    print(' Finished '.center(80, '*'))

******************* Benchmarking using Tesla P100-PCIE-16GB ********************
************************************ Start *************************************
Sequence Length:   64 	 Batch Size:    8 	 Ms per protein 0.01
Sequence Length:   64 	 Batch Size:   16 	 Ms per protein 0.01
Sequence Length:   64 	 Batch Size:   24 	 Ms per protein 0.01
Sequence Length:   64 	 Batch Size:   32 	 Ms per protein 0.01
************************************* Done *************************************
Sequence Length:  128 	 Batch Size:    8 	 Ms per protein 0.02
Sequence Length:  128 	 Batch Size:   16 	 Ms per protein 0.02
Sequence Length:  128 	 Batch Size:   24 	 Ms per protein 0.02
Sequence Length:  128 	 Batch Size:   32 	 Ms per protein 0.02
************************************* Done *************************************
Sequence Length:  192 	 Batch Size:    8 	 Ms per protein 0.02
Sequence Length:  192 	 Batch Size:   16 	 Ms per protein 0.02
Sequence Length:  192 	 Batch Size:   24 	 Ms 

<b>7. Start Benchmarking Generator</b>

In [16]:
device_name = torch.cuda.get_device_name(device.index) if device.type == 'cuda' else 'CPU'

with torch.no_grad():
    print((' Benchmarking using ' + device_name + ' ').center(80, '*'))
    print(' Start '.center(80, '*'))
    for sequence_length in range(min_sequence_length,max_sequence_length+1,inc_sequence_length):
        for batch_size in range(min_batch_size,max_batch_size+1,inc_batch_size):
            start = time.time()
            for i in range(iterations):
                input_ids = torch.randint(1, 20, (batch_size,sequence_length)).cuda()
                results = generator(input_ids)[0].cpu().numpy()
            end = time.time()
            ms_per_protein = (end-start)/(iterations*batch_size)
            print('Sequence Length: %4d \t Batch Size: %4d \t Ms per protein %4.2f' %(sequence_length,batch_size,ms_per_protein))
        print(' Done '.center(80, '*'))
    print(' Finished '.center(80, '*'))

******************* Benchmarking using Tesla P100-PCIE-16GB ********************
************************************ Start *************************************
Sequence Length:   64 	 Batch Size:    8 	 Ms per protein 0.00
Sequence Length:   64 	 Batch Size:   16 	 Ms per protein 0.00
Sequence Length:   64 	 Batch Size:   24 	 Ms per protein 0.00
Sequence Length:   64 	 Batch Size:   32 	 Ms per protein 0.00
************************************* Done *************************************
Sequence Length:  128 	 Batch Size:    8 	 Ms per protein 0.00
Sequence Length:  128 	 Batch Size:   16 	 Ms per protein 0.00
Sequence Length:  128 	 Batch Size:   24 	 Ms per protein 0.00
Sequence Length:  128 	 Batch Size:   32 	 Ms per protein 0.00
************************************* Done *************************************
Sequence Length:  192 	 Batch Size:    8 	 Ms per protein 0.00
Sequence Length:  192 	 Batch Size:   16 	 Ms per protein 0.00
Sequence Length:  192 	 Batch Size:   24 	 Ms 