In [1]:
# !git clone https://huggingface.co/datasets/openai/gsm8k

Cloning into 'gsm8k'...
remote: Enumerating objects: 41, done.[K
remote: Counting objects: 100% (41/41), done.[K
remote: Compressing objects: 100% (27/27), done.[K
remote: Total 41 (delta 12), reused 28 (delta 9), pack-reused 0 (from 0)[K
Unpacking objects: 100% (41/41), 10.31 KiB | 1.03 MiB/s, done.


In [8]:
import pandas as pd
from pathlib import Path

In [12]:
data_path = 'gsm8k'
data = pd.read_parquet(Path(data_path) / "main")

In [10]:
!pip install fastparquet

Collecting fastparquet
  Downloading fastparquet-2024.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.2 kB)
Collecting cramjam>=2.3 (from fastparquet)
  Downloading cramjam-2.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.0 kB)
Downloading fastparquet-2024.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m21.4 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading cramjam-2.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m112.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: cramjam, fastparquet
Successfully installed cramjam-2.10.0 fastparquet-2024.11.0
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m25.0

In [19]:
data.iloc[0].answer

'Janet sells 16 - 3 - 4 = <<16-3-4=9>>9 duck eggs a day.\nShe makes 9 * 2 = $<<9*2=18>>18 every day at the farmer’s market.\n#### 18'

In [17]:
!uv sync

[2mResolved [1m63 packages[0m [2min 0.60ms[0m[0m
[2mAudited [1m61 packages[0m [2min 0.02ms[0m[0m


In [20]:
import re


def extract_solution(solution_str, method='strict'):
    assert method in ['strict', 'flexible']

    if method == 'strict':
        # this also tests the formatting of the model
        solution = re.search("#### (\\-?[0-9\\.\\,]+)", solution_str)
        if solution is None:
            final_answer = None
        else:
            final_answer = solution.group(0)
            final_answer = final_answer.split('#### ')[1].replace(',', '').replace('$', '')
    elif method == 'flexible':
        answer = re.findall("(\\-?[0-9\\.\\,]+)", solution_str)
        final_answer = None
        if len(answer) == 0:
            # no reward is there is no answer
            pass
        else:
            invalid_str = ['', '.']
            # find the last number that is not '.'
            for final_answer in reversed(answer):
                if final_answer not in invalid_str:
                    break
    return final_answer


def compute_score(solution_str, ground_truth, method='strict', format_score=0., score=1.):
    """The scoring function for GSM8k.

    Reference: Trung, Luong, et al. "Reft: Reasoning with reinforced fine-tuning." Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). 2024.

    Args:
        solution_str: the solution text
        ground_truth: the ground truth
        method: the method to extract the solution, choices are 'strict' and 'flexible'
        format_score: the score for the format
        score: the score for the correct answer
    """
    answer = extract_solution(solution_str=solution_str, method=method)
    if answer is None:
        return 0
    else:
        if answer == ground_truth:
            return score
        else:
            return format_score

In [21]:
question = data.iloc[0].question
answer = '18'

In [23]:
compute_score(data.iloc[0].answer, answer, format_score=0.1)

0.1

In [25]:
print(extract_solution(data.iloc[0].answer))

18


In [27]:
data.iloc[0].answer

'Janet sells 16 - 3 - 4 = <<16-3-4=9>>9 duck eggs a day.\nShe makes 9 * 2 = $<<9*2=18>>18 every day at the farmer’s market.\n#### 18'

In [43]:
SYSTEM_MESSAGE = (
    "You are a helpful assistant. You first think about the reasoning process "
    "in your mind and then provide the user with the answer."
)
USER_TEMPLATE = (
    "{question} "
    "The final answer should be prefixed with #### and should appear after the reasoning"
)
RESPONSE_PROMPT = "Let me solve this step by step."

In [34]:
from tokenizer import Tokenizer

In [33]:
!pip install tokenizers

Collecting tokenizers
  Downloading tokenizers-0.21.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Collecting huggingface-hub<1.0,>=0.16.4 (from tokenizers)
  Downloading huggingface_hub-0.30.2-py3-none-any.whl.metadata (13 kB)
Collecting fsspec>=2023.5.0 (from huggingface-hub<1.0,>=0.16.4->tokenizers)
  Downloading fsspec-2025.3.2-py3-none-any.whl.metadata (11 kB)
Collecting tqdm>=4.42.1 (from huggingface-hub<1.0,>=0.16.4->tokenizers)
  Downloading tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.7/57.7 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
Downloading tokenizers-0.21.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/3.0 MB[0m [31m83.1 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hDownloading huggingface_hub-0.30.2-py3-none-any.whl (481 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m

In [38]:
pretrained_model_path = Path("Qwen2.5-3B-Instruct")
tokenizer = Tokenizer(str(pretrained_model_path / "tokenizer.json"))

In [44]:
def encode_prefix(question: str):
    """Prefix is the *actual* input to the model."""
    user_message = USER_TEMPLATE.format(question=question)
    prefix = tokenizer.encode_chat_with_response_prompt(
        [
            {"role": "system", "content": SYSTEM_MESSAGE},
            {"role": "user", "content": user_message},
        ],
        RESPONSE_PROMPT,
    )
    tokens = tokenizer.tokenize(prefix)
    return {
        "prefix": prefix,
        "prefix_tokens": tokens.tokens,
        "prefix_token_ids": tokens.ids,
    }

In [45]:
encode_prefix(data.iloc[0].question)

{'prefix': "<|im_start|>system\nYou are a helpful assistant. You first think about the reasoning process in your mind and then provide the user with the answer.<|im_end|>\n<|im_start|>user\nJanet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market? The final answer should be prefixed with #### and should appear after the reasoning<|im_end|>\n<|im_start|>assistant\nLet me solve this step by step.",
 'prefix_tokens': ['<|im_start|>',
  'system',
  'Ċ',
  'You',
  'Ġare',
  'Ġa',
  'Ġhelpful',
  'Ġassistant',
  '.',
  'ĠYou',
  'Ġfirst',
  'Ġthink',
  'Ġabout',
  'Ġthe',
  'Ġreasoning',
  'Ġprocess',
  'Ġin',
  'Ġyour',
  'Ġmind',
  'Ġand',
  'Ġthen',
  'Ġprovide',
  'Ġthe',
  'Ġuser',
  'Ġwith',
  'Ġthe',
  'Ġanswer',
  '.',
  '<|im_end|>',
  'Ċ',
  '<|im_start|>',
  'u

In [47]:
data.iloc[0].to_dict()

{'question': "Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?",
 'answer': 'Janet sells 16 - 3 - 4 = <<16-3-4=9>>9 duck eggs a day.\nShe makes 9 * 2 = $<<9*2=18>>18 every day at the farmer’s market.\n#### 18'}