In [1]:
BASE_MODEL = 'google/gemma-3-270m-it'
DATASET_NAME = 'Dahoas/rm-static'
from transformers import AutoTokenizer , AutoModelForCausalLM
from datasets import load_dataset

from trl import DPOConfig , DPOTrainer
 
import os 

FILE_PATH = os.path.dirname(os.path.abspath(""))

import logging
logging.basicConfig(level = logging.CRITICAL)

import warnings
warnings.filterwarnings(action = 'ignore')


In [2]:
# preparing dataset 
train_dataset = load_dataset(DATASET_NAME , cache_dir = FILE_PATH, split='train[:5%]')


print(type(train_dataset))

# train_dataset = train_dataset[0:500] #only take first 500 examples 
# print(type(train_dataset)) 


<class 'datasets.arrow_dataset.Dataset'>


In [3]:
train_dataset[0]

{'prompt': '\n\nHuman: Can you describe the steps to clean fingerprints and smudges from a laptop screen\n\nAssistant: Yes, certainly. To clean your screen, you first need to use a microfiber cloth or soft, damp cloth to gently wipe down the surface of the screen. Next, you’ll want to grab a soft, lint-free, microfiber cleaning cloth and gently rub it back and forth across the screen to remove fingerprints and smudges.\n\nHuman: Can I spray isopropyl alcohol onto the cloth and clean it that way?\n\nAssistant:',
 'response': ' Yes, you can do that to help the cloth pick up even more dirt from the screen. Be sure to always use a clean, soft cloth, not a piece of scratchy, roughened, or textured material, and make sure it’s lint-free.',
 'chosen': ' Yes, you can do that to help the cloth pick up even more dirt from the screen. Be sure to always use a clean, soft cloth, not a piece of scratchy, roughened, or textured material, and make sure it’s lint-free.',
 'rejected': ' Yes, you can spr

In [4]:
# model and tokenizer 


policy_model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, cache_dir = FILE_PATH, attn_implementation='eager')
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL , cache_dir = FILE_PATH)


In [5]:
# vizualize_tokenizer 
import json

cfg_path = os.path.join(FILE_PATH+'/Dahoas__rm-static/', "tokenizer_config.json")
print('cfg path', cfg_path)
if os.path.isfile(cfg_path):
    print("tokenizer_config.json:")
    print(json.dumps(json.load(open(cfg_path)), indent=2))

prompt = 'Hi Chat, How are you doing ?'
prompt2 = 'Hello world ! I am here'
out2 = tokenizer(prompt2)
out = tokenizer(prompt)

print(out)
print(out2)

print(tokenizer.bos_token_id, tokenizer.pad_token_id)
print(policy_model.config.bos_token_id, policy_model.config.pad_token_id)


if hasattr(policy_model, "generation_config") and policy_model.generation_config is not None:
    policy_model.generation_config.bos_token_id = tokenizer.bos_token_id
    policy_model.generation_config.pad_token_id = tokenizer.pad_token_id


print('\n\n---- TOKENIZER AND MODELS ----')
print("tokenizer vocab_size:", len(tokenizer))
print("model.config.vocab_size:", getattr(policy_model.config, "vocab_size", None))
print("embeddings rows:", policy_model.get_input_embeddings().weight.shape[0])

print("tokenizer special map:", tokenizer.special_tokens_map)
print("tokenizer ids: bos, pad, eos =", tokenizer.bos_token_id, tokenizer.pad_token_id, tokenizer.eos_token_id)
print("model ids:     bos, pad, eos =", policy_model.config.bos_token_id, policy_model.config.pad_token_id, policy_model.config.eos_token_id)
print("generation_config:", getattr(policy_model, "generation_config", None))


cfg path /Users/mohitdulani/Desktop/personal/rlhf/Dahoas__rm-static/tokenizer_config.json
{'input_ids': [2, 10979, 25380, 236764, 2088, 659, 611, 3490, 2360], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1]}
{'input_ids': [2, 9259, 1902, 1717, 564, 1006, 1590], 'attention_mask': [1, 1, 1, 1, 1, 1, 1]}
2 0
2 0


---- TOKENIZER AND MODELS ----
tokenizer vocab_size: 262145
model.config.vocab_size: 262144
embeddings rows: 262144
tokenizer special map: {'bos_token': '<bos>', 'eos_token': '<eos>', 'unk_token': '<unk>', 'pad_token': '<pad>', 'boi_token': '<start_of_image>', 'eoi_token': '<end_of_image>', 'image_token': '<image_soft_token>'}
tokenizer ids: bos, pad, eos = 2 0 1
model ids:     bos, pad, eos = 2 0 1
generation_config: GenerationConfig {
  "bos_token_id": 2,
  "cache_implementation": "hybrid",
  "do_sample": true,
  "eos_token_id": [
    1,
    106
  ],
  "pad_token_id": 0,
  "top_k": 64,
  "top_p": 0.95
}



model config just stores what were the bos , eos tokens used while training this model (not the actual token strings) so if you are planning to use another tokenizer set to finetune , match these up and if possible try to use the same tokenizer  

In [6]:
policy_model.generation_config.eos_token_id = ['1']
print("generation_config:", getattr(policy_model, "generation_config", None))


generation_config: GenerationConfig {
  "bos_token_id": 2,
  "cache_implementation": "hybrid",
  "do_sample": true,
  "eos_token_id": [
    "1"
  ],
  "pad_token_id": 0,
  "top_k": 64,
  "top_p": 0.95
}



In [7]:
# find the config file for the tokenizer 
# find the extra id & token
extra_id = len(tokenizer) - 1
extra_token = tokenizer.convert_ids_to_tokens(extra_id)
print("extra_id:", extra_id, "extra_token:", extra_token)

# check if it is an added token or a special token
print("is special token?", extra_token in tokenizer.all_special_tokens)
print("added tokens:", tokenizer.get_added_vocab())   # dict token->id for user-added tokens
print("appears in vocab?", extra_token in tokenizer.get_vocab())


extra_id: 262144 extra_token: <image_soft_token>
is special token? True
added tokens: {'<pad>': 0, '<eos>': 1, '<bos>': 2, '<unk>': 3, '<mask>': 4, '[multimodal]': 5, '<unused0>': 6, '<unused1>': 7, '<unused2>': 8, '<unused3>': 9, '<unused4>': 10, '<unused5>': 11, '<unused6>': 12, '<unused7>': 13, '<unused8>': 14, '<unused9>': 15, '<unused10>': 16, '<unused11>': 17, '<unused12>': 18, '<unused13>': 19, '<unused14>': 20, '<unused15>': 21, '<unused16>': 22, '<unused17>': 23, '<unused18>': 24, '<unused19>': 25, '<unused20>': 26, '<unused21>': 27, '<unused22>': 28, '<unused23>': 29, '<unused24>': 30, '<unused25>': 31, '<unused26>': 32, '<unused27>': 33, '<unused28>': 34, '<unused29>': 35, '<unused30>': 36, '<unused31>': 37, '<unused32>': 38, '<unused33>': 39, '<unused34>': 40, '<unused35>': 41, '<unused36>': 42, '<unused37>': 43, '<unused38>': 44, '<unused39>': 45, '<unused40>': 46, '<unused41>': 47, '<unused42>': 48, '<unused43>': 49, '<unused44>': 50, '<unused45>': 51, '<unused46>': 52, '

In [8]:
policy_model.config.tokenizer_class


In [9]:

config = DPOConfig(
    do_train = True,
    per_device_train_batch_size=8,
    learning_rate=5e-8,
    bf16 = False,
    fp16=False,
    logging_strategy='steps', 
    logging_steps=2, 
)


trainer = DPOTrainer(
    model = policy_model, 
    args = config, 
    train_dataset = train_dataset
)


trainer.train()


Extracting prompt in train dataset:   0%|          | 0/3813 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/3813 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/3813 [00:00<?, ? examples/s]

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 1}.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already 

Step,Training Loss


KeyboardInterrupt: 

In [None]:
del policy_model , tokenizer

import gc
gc.collect()


2191

: 