In [1]:
import numpy as np
import random
import torch
from accelerate import Accelerator
from utils import *
import grpo_utils
import reasoning_gym
from reasoning_gym import get_score_answer_fn

model_name = "HuggingFaceTB/SmolLM-135M-Instruct"
batch_size = 2
# number of sequences generated
n_rollouts = 3
buffer_size = 6
# thinking space
max_new_tokens = 100

llm = grpo_utils.load_model(model_name)
for param in llm.parameters():
    param.requires_grad = True
tokenizer = grpo_utils.load_tokenizer(model_name)
dataloader = grpo_utils.get_dataloader("webinstruct", tokenizer)
optimizer = torch.optim.AdamW(llm.parameters(), lr = 1e-5)

accelerator = Accelerator()
llm, tokenizer, dataloader, optimizer = accelerator.prepare(llm, tokenizer, dataloader, optimizer)

Loaded 2335220 samples from WebInstructSub


In [2]:
batch = next(iter(dataloader))
batch.keys()

dict_keys(['inputs', 'validator'])

In [3]:
batch["validator"]

[{'question': 'How can I enable case-sensitive searching in Emacs 26.1 on Windows 10 when using I-search?',
  'expected_answer': 'To activate case-sensitive I-search in Emacs, press the key combination `M-c` while performing the search. This will toggle case sensitivity. You can find more information about this feature in the Emacs documentation.',
  'source': 'stackexchange',
  'orig_question': 'I-search - case sensitive\n\nwindows 10, emacs 26.1\nWhen I use I-search, then Emacs ignore case sensitive.\n\nbut I need case sensitive I-search',
  'orig_answer': 'You can toggle case sensitivity during incremental search by typing M-c.\nDocumentation here.',
  'index': 2046046,
  'validate_function': <bound method WebInstructDataset._validate_answer of <grpo_utils.WebInstructDataset object at 0x30e0fd4f0>>},
 {'question': 'In the AIME problem mentioned, if the integer k is added to each of the numbers 50, 400, and 750, what would be the common difference of the resulting arithmetic sequence

In [4]:
batch["inputs"].keys()

dict_keys(['input_ids', 'attention_mask'])

In [5]:
batch["inputs"]["input_ids"]

tensor([[2020,  416,  339,  ...,    2,    2,    2],
        [ 788,  260, 5646,  ...,    2,    2,    2]], device='mps:0')

In [6]:
batch["inputs"]["input_ids"].shape

torch.Size([2, 512])

In [7]:
print(tokenizer.decode(batch["inputs"]["input_ids"][0]))

How can I enable case-sensitive searching in Emacs 26.1 on Windows 10 when using I-search?<|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|>

In [8]:
batch["validator"]

[{'question': 'How can I enable case-sensitive searching in Emacs 26.1 on Windows 10 when using I-search?',
  'expected_answer': 'To activate case-sensitive I-search in Emacs, press the key combination `M-c` while performing the search. This will toggle case sensitivity. You can find more information about this feature in the Emacs documentation.',
  'source': 'stackexchange',
  'orig_question': 'I-search - case sensitive\n\nwindows 10, emacs 26.1\nWhen I use I-search, then Emacs ignore case sensitive.\n\nbut I need case sensitive I-search',
  'orig_answer': 'You can toggle case sensitivity during incremental search by typing M-c.\nDocumentation here.',
  'index': 2046046,
  'validate_function': <bound method WebInstructDataset._validate_answer of <grpo_utils.WebInstructDataset object at 0x30e0fd4f0>>},
 {'question': 'In the AIME problem mentioned, if the integer k is added to each of the numbers 50, 400, and 750, what would be the common difference of the resulting arithmetic sequence

In [9]:
input_ids = batch["inputs"]["input_ids"]
attention_mask = batch["inputs"]["attention_mask"]
validator = batch["validator"]
input_size = input_ids.shape[1]

In [10]:
with torch.no_grad():
    full_responses = llm.generate(
        input_ids = input_ids,
        attention_mask = attention_mask,
        max_new_tokens = max_new_tokens,
        do_sample = True,
        top_p = 0.95,
        num_return_sequences = n_rollouts,
        temperature = 1,
        eos_token_id = tokenizer.eos_token_id)

    assistant_responses = full_responses[:, input_size:]

    log_probs = grpo_utils.calculate_logits(llm, full_responses, attention_mask)
    decoded_responses = tokenizer.batch_decode(assistant_responses, skip_special_tokens = True)

    rewards = grpo_utils.calculate_rewards(decoded_responses, np.repeat(validator, n_rollouts))
    

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
