### Trying to run a RLHF training for BARTspectro
Try to run a PPO training on BARTspectro. Rewards are Fingerprint similarities  

#### Example code snippet from 
https://github.com/huggingface/trl

In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="3"

# imports
import torch
from transformers import AutoTokenizer
from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead, create_reference_model
from trl.core import respond_to_batch
from train_bart import build_tokenizer
from data_utils import build_single_datapipe
from data_utils import SpectroDataCollator, SpectroDataset
from tqdm import tqdm
from bart_spektro.ppo_spectro_trainer import PPOSpectroTrainer 
from data_utils import SpectroDataCollator
from bart_spektro.modeling_bart_spektro import BartSpektroForConditionalGeneration
from metrics import compute_cos_simils


%load_ext autoreload
%autoreload 2


[2023-11-01 17:04:04,165] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)


In [2]:
# get models
bart_model = BartSpektroForConditionalGeneration.from_pretrained('checkpoints/finetune/fresh-blaze-258_4_8M_rassp1_neims1_224kPretrain/checkpoint-73440/')
model = AutoModelForCausalLMWithValueHead.from_pretrained(bart_model)
model_ref = create_reference_model(model)

tokenizer = build_tokenizer("tokenizer/bbpe_tokenizer/bart_bbpe_1M_tokenizer.model")

In [3]:
# load data
train_pipe = build_single_datapipe("data/datasets/NIST/NIST_split_filip/train.jsonl", shuffle=True, buffer_size=1000)
valid_pipe = build_single_datapipe("data/datasets/NIST/NIST_split_filip/valid.jsonl", shuffle=False)

# initialize trainer
ppo_config = PPOConfig(
    batch_size=64,          
    forward_batch_size=None,  # not used
    backward_batch_size=64, # bs per one device futher split into mini_batch_size
    mini_batch_size=8,  # bs within backward_bs, actually used as bs in forward pass / backward pass (step of optimizer)
    is_encoder_decoder=True,
    log_with="wandb",
)

shuffling data/datasets/NIST/NIST_split_filip/train.jsonl with buffer_size=1000


In [4]:
trainer = PPOSpectroTrainer(
    model=model,
    config=ppo_config,
    dataset=train_pipe,
    tokenizer=tokenizer,
    data_collator=SpectroDataCollator(),
)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mhajekad[0m ([33mmsgc_boys[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [5]:
for x in trainer.dataloader:
    print(x)
    break

{'input_ids': tensor([[26, 27, 29,  ...,  2,  2,  2],
        [26, 27, 29,  ...,  2,  2,  2],
        [27, 28, 29,  ...,  2,  2,  2],
        ...,
        [33, 34, 36,  ...,  2,  2,  2],
        [33, 38, 39,  ...,  2,  2,  2],
        [14, 15, 18,  ...,  2,  2,  2]], device='cuda:0'), 'position_ids': tensor([[ 0,  3,  4,  ..., -1, -1, -1],
        [ 0,  0,  0,  ..., -1, -1, -1],
        [ 1,  0,  2,  ..., -1, -1, -1],
        ...,
        [ 0,  0,  0,  ..., -1, -1, -1],
        [ 0,  7,  7,  ..., -1, -1, -1],
        [ 0,  2,  0,  ..., -1, -1, -1]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]], device='cuda:0'), 'decoder_attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0

In [5]:
generation_kwargs = {"top_k": None,
              "top_p": None,
              "do_sample": True,
              "num_beams": 5,
              "temperature": None,
              "penalty_alpha": None,
              "num_return_sequences": 1,
              "length_penalty": 1.0}
# training loop

for epoch, batch in tqdm(enumerate(trainer.dataloader)):
    #### Get response from SFTModel
    preds = trainer.generate(batch, "cuda:0", **generation_kwargs) # add model specific inputs and generation kwargs

    #### Compute reward score
    preds_str = tokenizer.batch_decode(preds, skip_special_tokens=True)
    gts_str = [tokenizer.decode((label*mask).tolist(), skip_special_tokens=True) for label, mask in zip(batch["labels"], batch["decoder_attention_mask"])]
    smiles_simils, pred_mols, gt_mols = compute_cos_simils(preds_str, gts_str, return_mols=True)        
    
    scores = list(map(torch.tensor, smiles_simils))

    #### Run PPO step
    stats = trainer.step(batch, list(preds), scores)
    trainer.log_stats(stats, batch, scores)

#### Save model
trainer.save_model("my_ppo_model")

0it [00:58, ?it/s]


RuntimeError: CUDA out of memory. Tried to allocate 204.00 MiB (GPU 0; 79.15 GiB total capacity; 19.72 GiB already allocated; 146.94 MiB free; 20.62 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [32]:
trainer.is_encoder_decoder

True

In [22]:
model

AutoModelForCausalLMWithValueHead(
  (pretrained_model): BartSpektroForConditionalGeneration(
    (model): BartSpektroModel(
      (shared): Embedding(1240, 1024, padding_idx=2)
      (encoder): BartSpektroEncoder(
        (embed_tokens): Embedding(1240, 1024, padding_idx=2)
        (embed_positions): BartSpektroLearnedPositionalEmbedding(12, 1024)
        (layers): ModuleList(
          (0): BartEncoderLayer(
            (self_attn): BartAttention(
              (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
              (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
              (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
              (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
            )
            (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
            (activation_fn): GELUActivation()
            (fc1): Linear(in_features=1024, out_features=4096, bias=True)
   

In [2]:
from trl.core import PPODecorators

In [35]:
model.pretrained_model.config.pad_token_id

2

In [32]:
a = torch.tensor([1,2,3,4,5,6,7,8])
(torch.isin(a, torch.tensor([0,1,2,3]), invert=True)).sum().item()

5

In [35]:
a.device

[autoreload of bart_spektro.ppo_spectro_trainer failed: Traceback (most recent call last):
  File "/home/xhajek9/.local/lib/python3.8/site-packages/IPython/extensions/autoreload.py", line 273, in check
    superreload(m, reload, self.old_objects)
  File "/home/xhajek9/.local/lib/python3.8/site-packages/IPython/extensions/autoreload.py", line 471, in superreload
    module = reload(module)
  File "/home/xhajek9/miniconda3/envs/BARTtrain/lib/python3.8/importlib/__init__.py", line 169, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 604, in _exec
  File "<frozen importlib._bootstrap_external>", line 839, in exec_module
  File "<frozen importlib._bootstrap_external>", line 976, in get_code
  File "<frozen importlib._bootstrap_external>", line 906, in source_to_code
  File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
  File "/home/xhajek9/gc-ms_bart/bart_spektro/ppo_spectro_trainer.py", line 192
    response_lens = torch.t

device(type='cpu')