In [1]:
from transformers import RobertaTokenizer, RobertaModel, RobertaForMaskedLM, BertForMaskedLM

protein_tokenizer = RobertaTokenizer.from_pretrained("data/target/protein_tokenizer")
vocab_size = len(protein_tokenizer.get_vocab().keys())

print(f"load tokenizer\nvocab size: {vocab_size}\nspecial tokens: {protein_tokenizer.all_special_tokens}")

load tokenizer
vocab size: 10261
special tokens: ['<s>', '</s>', '<unk>', '<pad>', '<mask>']


In [2]:
import os
import pickle

if not os.path.exists("data/target/X.pkl"):
    from sklearn.model_selection import train_test_split
    
    with open("data/target/X_sampled.pkl", 'rb') as f:
        data = pickle.load(f)
    
        print(f"load dataset ... # of data: {len(data)}")
    
    X_train, X_test = train_test_split(data, test_size=0.1, random_state=42, shuffle=True)
    X_train, X_valid = train_test_split(X_train, test_size=0.005, random_state=42, shuffle=True)
    
    with open("data/target/X.pkl", "wb") as f:
        pickle.dump([X_train, X_valid, X_test], f)
else:
    with open("data/target/X.pkl", "rb") as f:
        X_train, X_valid, X_test = pickle.load(f)
        
print(f"load dataset\nX_train: {len(X_train)}\nX_valid: {len(X_valid)}\nX_test: {len(X_test)}")

load dataset
X_train: 31342500
X_valid: 157500
X_test: 3500000


In [3]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset, RandomSampler
from transformers import DataCollatorForLanguageModeling

max_seq_len = 512

class MaskedLMDataset(Dataset):
    def __init__(self, data, tokenizer, max_length=512):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length
        
        
    def encode(self, data):
        return self.tokenizer.encode(data, max_length=self.max_length, truncation=True)
        
        
    def __len__(self):
        return len(self.data)

    
    def __getitem__(self, idx):
        return torch.tensor(self.encode(self.data[idx]), dtype=torch.long)
    
    
data_collator_train = DataCollatorForLanguageModeling(
    tokenizer=protein_tokenizer, mlm=True, mlm_probability=0.2
)

data_collator_valid = DataCollatorForLanguageModeling(
    tokenizer=protein_tokenizer, mlm=True, mlm_probability=0.15
)

train_dataset = MaskedLMDataset(X_train, protein_tokenizer, max_length=max_seq_len)
train_sampler = RandomSampler(X_train, replacement=True, num_samples=100000)
train_dataloader = DataLoader(train_dataset, batch_size=64, collate_fn=data_collator_train, num_workers=16, pin_memory=True, prefetch_factor=10, drop_last=True, sampler=train_sampler)

valid_dataset = MaskedLMDataset(X_valid, protein_tokenizer, max_length=max_seq_len)
valid_sampler = RandomSampler(X_valid, replacement=True, num_samples=20000)
valid_dataloader = DataLoader(valid_dataset, batch_size=64, collate_fn=data_collator_valid, num_workers=16, pin_memory=True, prefetch_factor=10, sampler=valid_sampler)

test_dataset = MaskedLMDataset(X_test, protein_tokenizer, max_length=max_seq_len)
test_dataloader = DataLoader(test_dataset, batch_size=64, collate_fn=data_collator_valid, num_workers=16, pin_memory=True, prefetch_factor=10)


In [12]:
import torchmetrics
import pytorch_lightning as pl
from transformers import RobertaConfig, RobertaForMaskedLM
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping


config = RobertaConfig(
    vocab_size=vocab_size,
    hidden_size=512,
    num_hidden_layers=12,
    num_attention_heads=8,
    intermediate_size=2048,
    hidden_act="gelu",
    hidden_dropout_prob=0.1,
    attention_probs_dropout_prob=0.1,
    max_position_embeddings=max_seq_len + 2,
    type_vocab_size=1,
    position_embedding_type="absolute"
)


class Bert(pl.LightningModule):
    def __init__(self, config):
        super().__init__()
        self.save_hyperparameters()
        self.model = RobertaForMaskedLM(config)
        
        self.train_accuracy = torchmetrics.Accuracy()
        self.valid_accuracy = torchmetrics.Accuracy()
        self.test_accuracy = torchmetrics.Accuracy()
        
        
    def forward(self, input_ids, labels):
        return self.model(input_ids=input_ids, labels=labels)

       
    def training_step(self, batch, batch_idx):
        input_ids = batch['input_ids']
        labels = batch['labels']
        
        output = self(input_ids, labels)

        loss = output.loss
        logits = output.logits

        preds = logits.argmax(dim=-1)
        
        self.log('train_loss', float(loss), on_step=True, on_epoch=True, prog_bar=True)
        self.log("train_accuracy", self.train_accuracy(preds[labels > 0], labels[labels > 0]), on_step=False, on_epoch=True, prog_bar=True, logger=True)
        
        return loss

    
    def validation_step(self, batch, batch_idx):
        input_ids = batch['input_ids']
        labels = batch['labels']
        
        output = self(input_ids, labels)

        loss = output.loss
        logits = output.logits

        preds = logits.argmax(dim=-1)
        
        self.log('valid_loss', float(loss), on_step=False, on_epoch=True, prog_bar=True)
        self.log("valid_accuracy", self.valid_accuracy(preds[labels > 0], labels[labels > 0]), on_step=False, on_epoch=True, prog_bar=True, logger=True)
    
    
    def test_step(self, batch, batch_idx):
        input_ids = batch['input_ids']
        labels = batch['labels']
        
        output = self(input_ids, labels)

        loss = output.loss
        logits = output.logits

        preds = logits.argmax(dim=-1)
        
        self.log('test_loss', float(loss), on_step=False, on_epoch=True, prog_bar=True)
        self.log("test_accuracy", self.test_accuracy(preds[labels > 0], labels[labels > 0]), on_step=False, on_epoch=True, prog_bar=True, logger=True)
    
    
    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.parameters(), lr=1e-3)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)
    
        return {"optimizer": optimizer, "lr_scheduler": scheduler}
    
    
model = Bert(config)
callbacks = [
    ModelCheckpoint(monitor='valid_loss', save_top_k=30, dirpath='weights/protein_bert_pretraining_masking_rate_30', filename='protein_bert-{epoch:03d}-{valid_loss:.4f}-{valid_accuracy:.4f}'),
]

trainer = pl.Trainer(max_epochs=1000, gpus=1, enable_progress_bar=True, callbacks=callbacks, precision=16)

Using 16bit native Automatic Mixed Precision (AMP)
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [None]:
trainer.fit(model, train_dataloader, valid_dataloader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type               | Params
------------------------------------------------------
0 | model          | RobertaForMaskedLM | 43.6 M
1 | train_accuracy | Accuracy           | 0     
2 | valid_accuracy | Accuracy           | 0     
3 | test_accuracy  | Accuracy           | 0     
------------------------------------------------------
43.6 M    Trainable params
0         Non-trainable params
43.6 M    Total params
87.242    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/p

    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.

    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f

Validating: 0it [00:00, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/p

    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.

    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f

    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
      File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/p

Validating: 0it [00:00, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/p

    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.

    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f

    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/p

    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.

    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f

Validating: 0it [00:00, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
Exception ignored in:   File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
<function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>    
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
self._shutdown_workers()    
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/datalo

    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.

    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f

    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/p

    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
      File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
self._shutdown_workers()
      File "/home

    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f

    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
self._shutdown_workers()Traceback (most recent call last):
    
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/p

    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.

Validating: 0it [00:00, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/p

    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):

Exception ignored in:   File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
Traceback (most recent call last):
<function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, 

<function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>self._shutdown_workers()
Traceback (most recent call last):

  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    if w.is_alive():    
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
self._shutdown_workers()    
assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_

<function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
    if w.is_alive():Traceback (most recent call last):

  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
      File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError:     can only test a child process
self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback 

    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0><function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>

Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
        self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():

  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
Exception ignored in: Traceback (most recent call last):
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError<function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>: can only test a child process
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, 

    Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>self._shutdown_workers()

Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
        if w.is_alive():self._shutdown_workers()

  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.

AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
        self._shutdown_workers()assert self._parent_pid == os.getpid(), 'can only test a child process'

AssertionError: can only test a child process  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/home/ubuntu/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3a61d51ee0>
Assertion

In [5]:
ckpt_fname = ""

model = Bert(config).load_from_checkpoint("weights/protein_bert_pretraining_masking_rate_30/" + ckpt_fname)
trainer.test(model, test_dataloader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_accuracy': 0.9606544375419617, 'test_loss': 0.10385298728942871}
--------------------------------------------------------------------------------


[{'test_loss': 0.10385298728942871, 'test_accuracy': 0.9606544375419617}]

In [11]:
model.model.base_model.save_pretrained("weights/protein_bert")