In [28]:
import numpy as np
import random
import torch
from accelerate import Accelerator
from utils import *
import grpo_utils
import reasoning_gym
from reasoning_gym import get_score_answer_fn

model_name = "HuggingFaceTB/SmolLM-135M-Instruct"
batch_size = 2
# number of sequences generated
n_rollouts = 3
buffer_size = 6
# thinking space
max_new_tokens = 100

llm = grpo_utils.load_model(model_name)
for param in llm.parameters():
    param.requires_grad = True
tokenizer = grpo_utils.load_tokenizer(model_name)
dataloader = grpo_utils.get_dataloader("webinstruct", tokenizer)
optimizer = torch.optim.AdamW(llm.parameters(), lr = 1e-5)

accelerator = Accelerator()
llm, tokenizer, dataloader, optimizer = accelerator.prepare(llm, tokenizer, dataloader, optimizer)

Loaded 2335220 samples from WebInstructSub


In [29]:
batch = next(iter(dataloader))
batch.keys()

dict_keys(['inputs', 'validator'])

In [30]:
batch["validator"]

[{'question': 'How can we understand the existence of spinors in three-dimensional space without relying on complex numbers?',
  'expected_answer': "While spinors are often introduced using complex numbers, it is possible to understand their existence in three-dimensional space without explicitly using them. Here's a simplified explanation:\n\n1. **Vector Spaces and Linear Transformations**:\n   - In linear algebra, a vector space is a collection of vectors that can be added together and multiplied by scalars (numbers).\n   - Linear transformations are functions that map vectors from one vector space to another while preserving certain properties.\n\n2. **Rotations in Three Dimensions**:\n   - Rotations in three dimensions can be represented as linear transformations that preserve the length of vectors.\n   - These rotations form a group called the special orthogonal group, denoted as SO(3).\n\n3. **Spinors as Double-Valued Representations**:\n   - Spinors are mathematical objects that

In [31]:
batch["inputs"].keys()

dict_keys(['input_ids', 'attention_mask'])

In [32]:
batch["inputs"]["input_ids"]

tensor([[ 2020,   416,   392,  ...,     2,     2,     2],
        [15423,   827,  1784,  ...,     2,     2,     2]], device='mps:0')

In [33]:
batch["inputs"]["input_ids"].shape

torch.Size([2, 512])

In [34]:
print(tokenizer.decode(batch["inputs"]["input_ids"][0]))

How can we understand the existence of spinors in three-dimensional space without relying on complex numbers?<|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><

In [35]:
batch["validator"]

[{'question': 'How can we understand the existence of spinors in three-dimensional space without relying on complex numbers?',
  'expected_answer': "While spinors are often introduced using complex numbers, it is possible to understand their existence in three-dimensional space without explicitly using them. Here's a simplified explanation:\n\n1. **Vector Spaces and Linear Transformations**:\n   - In linear algebra, a vector space is a collection of vectors that can be added together and multiplied by scalars (numbers).\n   - Linear transformations are functions that map vectors from one vector space to another while preserving certain properties.\n\n2. **Rotations in Three Dimensions**:\n   - Rotations in three dimensions can be represented as linear transformations that preserve the length of vectors.\n   - These rotations form a group called the special orthogonal group, denoted as SO(3).\n\n3. **Spinors as Double-Valued Representations**:\n   - Spinors are mathematical objects that

In [36]:
input_ids = batch["inputs"]["input_ids"]
attention_mask = batch["inputs"]["attention_mask"]
validator = batch["validator"]
input_size = input_ids.shape[1]

In [37]:
with torch.no_grad():
    full_responses = llm.generate(
        input_ids = input_ids,
        attention_mask = attention_mask,
        max_new_tokens = max_new_tokens,
        do_sample = True,
        top_p = 0.95,
        num_return_sequences = n_rollouts,
        temperature = 1,
        eos_token_id = tokenizer.eos_token_id)

    # gets the thinking part
    assistant_responses = full_responses[:, input_size:]
 
    # logits for the selected tokens
    log_probs = grpo_utils.calculate_logits(llm, full_responses, attention_mask)
    # convert tokens to string
    decoded_responses = tokenizer.batch_decode(assistant_responses, skip_special_tokens = True)

    # model_responses = [batch_size * n_rollouts, max_new_tokens]
    rewards = grpo_utils.calculate_rewards(decoded_responses, np.repeat(validator, n_rollouts))
    rewards = np.reshape(rewards, [batch_size, n_rollouts])
    advantages = (rewards - np.mean(rewards, axis = 1, keepdims = True))/(np.std(rewards, axis = 1, keepdims = True)) + 1e-8

    advantages = advantages.reshape(-1, 1)
    

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
  advantages = (rewards - np.mean(rewards, axis = 1, keepdims = True))/(np.std(rewards, axis = 1, keepdims = True)) + 1e-8


In [38]:
decoded_responses
# ['\n\nI would need to find out which `?=` characters I can put in the `?=` environment variable and also, I would need to enable case sensitive searching for the `?=` environment variable.\n\nCan you help me clarify what I need to do?',
#  '\nIn general, Windows 10 on Linux and macOS can be done using the `/e` command. However, it will prompt you to specify the value of the `search` parameter.\n\nTo enable case-sensitive searching, you can add the `\\` characters to the search space and use the `/e` command to specify the value of the search operator:\n```\n\\mysearch -k=1>my-search.xml\n```\nwhere:',
#  '\nWhat are the benefits of using I-search in Emacs 26.1 on Windows 10?',
#  '\n5. **Linear Algebra Problem**: Given two matrices A and B, write a Python function that computes the product of these two matrices and returns the result as a matrix. Assume the matrices are not invertible and represent a set of 3D vectors.\n\n**Time Complexity:** (O(n^2)) for linear algebra operations, O(n^2) for matrix multiplication, O(n) for function composition, and O(n) for dynamic programming\n\n**Space',
#  '\n\n* The sequence starts with 50, 400, and 750.\n\n**Answer:**\n\nLet k = 50\n\nWe have a sequence of 50, 400, and 750. 50, 400, 750, 1000 = 50, 400, 750, 1000\nThe common difference',
#  '\n\nYour code should pass the following assertions:\n```python\nassert sum_numbers_recursive(10, 20) == 750 + 50\nassert sum_numbers_recursive(1, 5) == 350\nassert sum_numbers_recursive(0, 0) == 0\n```']

["\n\nLet's start with the basics of complex analysis:\n\n**Complex numbers**\n\nA complex number is a number of the form:\n\nz = a + bi, where a, b, and c are real numbers, and i is the imaginary unit. In other words, a + bi is called the imaginary part of z.\n\n**Spinors**\n\nA spinor is a mathematical function that assigns a mathematical value to each element of a complex vector space.",
 '\n\nAnswer: I will only understand the understanding of the spinors in three-dimensional space without relying on complex numbers. Without complex numbers, it would be difficult to provide a definitive explanation for the existence of spinors. Additionally, the spinors in three-dimensional space would still provide a basis for understanding the properties of particles, such as wave functions, wave functions in 3D space, and the behavior of particles in 3D space, which are still fundamental concepts in physics.',
 '\n\nIn the context of geometry, a fundamental concept is the notion of a single, unc

In [39]:
print(rewards)

[[-0.15 -0.15 -0.15]
 [-0.15 -0.15 -0.15]]
