In [None]:
from pathlib import Path
import random
import subprocess
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1,3" 
import sys

import torch
import pyarrow.parquet as pq
from transformers import AutoModelForCausalLM, AutoTokenizer
from IPython.display import Markdown, display

def find_repo_root(start: Path) -> Path:
    cur = start.resolve()
    for _ in range(6):
        if (cur / "pyproject.toml").exists() and (cur / "verl").is_dir():
            return cur
        if cur.parent == cur:
            break
        cur = cur.parent
    return start.resolve()

REPO_ROOT = find_repo_root(Path.cwd())

BASE_MODEL = "Qwen/Qwen2.5-7B-Instruct"
# CKPT_DIR = REPO_ROOT / "checkpoints/verl_grpo_critique/qwen2.5_7b_instruct_critique_llama3b_4epoch/global_step_180/actor"
# MERGED_DIR = CKPT_DIR / "merged_hf"
TRAIN_PATH = REPO_ROOT / "data/train_critique_no_directanswer_3.2_4.parquet"

NUM_EXAMPLES = 4
SEED = 42
MAX_NEW_TOKENS = 2048
DO_SAMPLE = True
TEMPERATURE = 0.7
TOP_P = 0.8
LOCAL_ONLY = False

torch.set_grad_enabled(False)

  import pynvml  # type: ignore[import]
  from .autonotebook import tqdm as notebook_tqdm


torch.autograd.grad_mode.set_grad_enabled(mode=False)

In [5]:
def has_merged_weights(path: Path) -> bool:
    if (path / "model.safetensors").exists():
        return True
    if (path / "pytorch_model.bin").exists():
        return True
    if (path / "model.safetensors.index.json").exists():
        return True
    if any(path.glob("model-*-of-*.safetensors")):
        return True
    if any(path.glob("pytorch_model-*-of-*.bin")):
        return True
    return False


def run_merge(cmd, cwd: Path):
    env = dict(os.environ)
    env["PYTHONPATH"] = str(cwd) + os.pathsep + env.get("PYTHONPATH", "")
    result = subprocess.run(
        cmd,
        capture_output=True,
        text=True,
        cwd=str(cwd),
        env=env,
    )
    if result.returncode != 0:
        print("Merge failed.")
        if result.stdout:
            print("stdout:\n" + result.stdout)
        if result.stderr:
            print("stderr:\n" + result.stderr)
        raise RuntimeError(f"Merge failed with exit code {result.returncode}")
    if result.stdout:
        print(result.stdout)

if not has_merged_weights(MERGED_DIR):
    MERGED_DIR.mkdir(parents=True, exist_ok=True)
    cmd = [
        sys.executable, "-m", "verl.model_merger", "merge",
        "--backend", "fsdp",
        "--local_dir", str(CKPT_DIR),
        "--target_dir", str(MERGED_DIR),
    ]
    print("Running:", " ".join(cmd))
    run_merge(cmd, REPO_ROOT)
else:
    print(f"Found merged HF model at {MERGED_DIR}")


NameError: name 'MERGED_DIR' is not defined

In [2]:
def pick_dtype() -> torch.dtype:
    if torch.cuda.is_available():
        major, _ = torch.cuda.get_device_capability()
        return torch.bfloat16 if major >= 8 else torch.float16
    return torch.float32

dtype = pick_dtype()
device_map = "auto" if torch.cuda.is_available() else None

tokenizer_base = AutoTokenizer.from_pretrained(
    BASE_MODEL, trust_remote_code=True, local_files_only=LOCAL_ONLY
)

def load_tokenizer(path: str, fallback=None):
    try:
        return AutoTokenizer.from_pretrained(
            path, trust_remote_code=True, local_files_only=True, use_fast=True
        )
    except Exception as exc:
        print(f"Tokenizer load failed for {path}: {exc}")
        if fallback is not None:
            print("Falling back to base tokenizer.")
            return fallback
        raise

# tokenizer_ft = load_tokenizer(str(MERGED_DIR), fallback=tokenizer_base)


if tokenizer_base.pad_token is None:
    tokenizer_base.pad_token = tokenizer_base.eos_token
# if tokenizer_ft.pad_token is None:
#     tokenizer_ft.pad_token = tokenizer_ft.eos_token

model_base = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=dtype,
    device_map=device_map,
    trust_remote_code=True,
    local_files_only=LOCAL_ONLY,
)
# model_ft = AutoModelForCausalLM.from_pretrained(
#     str(MERGED_DIR),
#     torch_dtype=dtype,
#     device_map=device_map,
#     trust_remote_code=True,
# )

if device_map is None:
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model_base.to(device)
    # model_ft.to(device)

model_base.eval()
# model_ft.eval()


`torch_dtype` is deprecated! Use `dtype` instead!
Loading checkpoint shards: 100%|██████████| 4/4 [00:14<00:00,  3.51s/it]


Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(152064, 3584)
    (layers): ModuleList(
      (0-27): 28 x Qwen2DecoderLayer(
        (self_attn): Qwen2Attention(
          (q_proj): Linear(in_features=3584, out_features=3584, bias=True)
          (k_proj): Linear(in_features=3584, out_features=512, bias=True)
          (v_proj): Linear(in_features=3584, out_features=512, bias=True)
          (o_proj): Linear(in_features=3584, out_features=3584, bias=False)
        )
        (mlp): Qwen2MLP(
          (gate_proj): Linear(in_features=3584, out_features=18944, bias=False)
          (up_proj): Linear(in_features=3584, out_features=18944, bias=False)
          (down_proj): Linear(in_features=18944, out_features=3584, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
        (post_attention_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
      )
    )
    (norm): Qwen2RMSNorm((3584,), eps=1e-06)
    (rotary_emb):

In [5]:
CUSTOM_PROMPTS = []  # Optional: override with your own list of message dicts.

if CUSTOM_PROMPTS:
    examples = CUSTOM_PROMPTS
else:
    table = pq.read_table(TRAIN_PATH, columns=["prompt"])
    prompts = table.column("prompt").to_pylist()
    random.seed(SEED)
    indices = random.sample(range(len(prompts)), k=NUM_EXAMPLES)
    examples = [prompts[i] for i in indices]

print(f"Loaded {len(examples)} critique prompts")


Loaded 4 critique prompts


In [6]:
def first_device(model) -> torch.device:
    if hasattr(model, "hf_device_map") and model.hf_device_map:
        for dev in model.hf_device_map.values():
            if dev not in ("cpu", "disk", "meta"):
                return torch.device(dev)
        first = next(iter(model.hf_device_map.values()))
        return torch.device(first)
    return model.device

def build_input(tokenizer, messages, model):
    input_ids = tokenizer.apply_chat_template(
        messages, add_generation_prompt=True, return_tensors="pt"
    )
    return input_ids.to(first_device(model))

@torch.no_grad()
def generate(model, tokenizer, messages):
    input_ids = build_input(tokenizer, messages, model)
    gen_kwargs = {
        "max_new_tokens": MAX_NEW_TOKENS,
        "do_sample": DO_SAMPLE,
        "pad_token_id": tokenizer.eos_token_id,
        "eos_token_id": tokenizer.eos_token_id,
    }
    if DO_SAMPLE:
        gen_kwargs.update({"temperature": TEMPERATURE, "top_p": TOP_P})

    output_ids = model.generate(input_ids, **gen_kwargs)
    new_tokens = output_ids[0, input_ids.shape[-1]:]
    return tokenizer.decode(new_tokens, skip_special_tokens=True).strip()

def format_block(text: str, max_chars: int = 1200) -> str:
    return text


In [7]:
for idx, messages in enumerate(examples, start=1):
    prompt_text = messages[0]["content"]
    base_out = generate(model_base, tokenizer_base, messages)
    #ft_out = generate(model_ft, tokenizer_ft, messages)

    display(Markdown(f"## Example {idx}"))
    display(Markdown("**Prompt**\n\n" + format_block(prompt_text).replace("\n", "  \n")))
    display(Markdown("**Base model**\n\n" + format_block(base_out).replace("\n", "  \n")))
    #display(Markdown("**Fine-tuned**\n\n" + format_block(ft_out).replace("\n", "  \n")))


The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


## Example 1

**Prompt**

You are a math solution critic. Critique the solution trace based on the verification result.  
User Question: The shape of the sign outside Bob's Burger Barn is a regular 65-gon. How many degrees are in the measure of an interior angle of this sign?  
  
Model Solution Trace:  
To find the measure of an interior angle of a regular polygon, we use the formula:  
  
Measure of each interior angle = (n - 2) * 180 / n  
  
where n is the number of sides.  
  
In this case, n = 65.  
  
Measure of each interior angle = (65 - 2) * 180 / 65  
= 63 * 180 / 65  
= 113.0769 degrees  
  
So, the measure of each interior angle of the sign is approximately 113 degrees.  
  
verification result: The solution was flagged as incorrect.  
Identify specific logical errors or confirm the solution trace. Provide constructive feedback but do not give the direct answer. 

**Base model**

The approach to solving the problem using the formula for the measure of an interior angle of a regular polygon is correct. However, there seems to be a minor arithmetic issue that might have led to the flagging as incorrect. Let's carefully re-examine the calculation step-by-step:  
  
1. **Formula Used**: Correctly identified as \((n - 2) \times 180 / n\).  
2. **Substitution**: Correct substitution of \(n = 65\).  
  
Now, let's break down the calculation:  
  
\[ \text{Measure of each interior angle} = \frac{(65 - 2) \times 180}{65} \]  
\[ = \frac{63 \times 180}{65} \]  
  
It appears the calculation might have been performed correctly, but let's ensure the arithmetic is accurate:  
  
- First, calculate \(63 \times 180\):  
  \[ 63 \times 180 = 11340 \]  
  
- Then divide by 65:  
  \[ \frac{11340}{65} \approx 174.7692 \]  
  
Given the verification result flags the solution as incorrect, it suggests there might be a misunderstanding or miscalculation. Double-checking the division and ensuring the steps are clear is important.  
  
Consider re-calculating the division to see if the result matches the expected value closely. If you still encounter issues, verify the precision of your calculator or manual division steps.

## Example 2

**Prompt**

You are a math solution critic. Critique the solution trace based on the verification result.  
User Question: The sixteenth term of an arithmetic sequence is $20$ and the thirty-second term is $-252.$ Find the sixty-fourth term of this sequence.  
  
Model Solution Trace:  
## Step 1: Define the formula for the nth term of an arithmetic sequence  
The formula for the nth term of an arithmetic sequence is given by: $a_n = a_1 + (n-1)d$, where $a_n$ is the nth term, $a_1$ is the first term, and $d$ is the common difference.  
  
## Step 2: Set up the system of equations using the given information  
We are given that the sixteenth term is 20 and the thirty-second term is -252. We can set up the following system of equations:  
$a_{16} = a_1 + (16-1)d = 20$  
$a_{32} = a_1 + (32-1)d = -252$  
  
## Step 3: Simplify the system of equations  
Simplifying the system of equations, we get:  
$a_1 + 15d = 20$  
$a_1 + 31d = -252$  
  
## Step 4: Solve the system of equations  
We can solve the system of equations by subtracting the first equation from the second equation to eliminate $a_1$:  
$(a_1 + 31d) - (a_1 + 15d) = -252 - 20$  
$16d = -272$  
$d = -17$  
  
## Step 5: Find the value of $a_1$  
Now that we have the value of $d$, we can substitute it into one of the original equations to find the value of $a_1$:  
$a_1 + 15(-17) = 20$  
$a_1 - 255 = 20$  
$a_1 = 275$  
  
## Step 6: Find the sixty-fourth term  
Now that we have the values of $a_1$ and $d$, we can find the sixty-fourth term:  
$a_{64} = a_1 + (64-1)d$  
$a_{64} = 275 + 63(-17)$  
$a_{64} = 275 - 1071$  
$a_{64} = -796$  
  
The final answer is: $\boxed{-796}$  
  
verification result: The solution was verified as correct.  
Identify specific logical errors or confirm the solution trace. Provide constructive feedback but do not give the direct answer. 

**Base model**

The solution trace provided is mostly correct, but there are a few areas where clarity and precision could be improved. Let's go through each step:  
  
1. **Step 1**: The formula for the \(n\)th term of an arithmetic sequence is correctly stated as \(a_n = a_1 + (n-1)d\).  
  
2. **Step 2**: Setting up the system of equations based on the given information is correct. However, it would be clearer if you explicitly state what \(a_{16}\) and \(a_{32}\) represent in terms of \(a_1\) and \(d\).  
  
3. **Step 3**: Simplifying the equations is correct. You might want to explicitly show the simplified form of the equations for clarity.  
  
4. **Step 4**: Solving the system of equations by elimination is correct. It would be beneficial to clearly indicate which equation is being subtracted from which.  
  
5. **Step 5**: Finding the value of \(a_1\) is correct. Ensure that the substitution and simplification steps are shown clearly.  
  
6. **Step 6**: Calculating the sixty-fourth term is correct. Make sure to clearly show all calculations and steps involved.  
  
Specifically, here are some suggestions for improvement:  
  
- **Step 2**: Clearly state that \(a_{16} = a_1 + 15d = 20\) and \(a_{32} = a_1 + 31d = -252\).  
- **Step 4**: Explicitly state that you are subtracting the first equation from the second to eliminate \(a_1\).  
- **Step 5**: Show the full simplification process when solving for \(a_1\).  
- **Step 6**: Ensure all intermediate steps in the calculation of \(a_{64}\) are clearly shown.  
  
Overall, the solution is logically sound, but with these minor adjustments, the explanation will be more detailed and easier to follow.

## Example 3

**Prompt**

You are a math solution critic. Critique the solution trace based on the verification result.  
User Question: Seventeen pencils cost as much as five pens. If eighteen pens cost $\$ 88.74$, what is the cost, in cents, of one pencil?  
  
Model Solution Trace:  
## Step 1:  First, we need to establish the cost relationship between pencils and pens.  
Seventeen pencils cost as much as five pens. This means that the cost of 17 pencils is equal to the cost of 5 pens.  
  
## Step 2:  We are given that eighteen pens cost $88.74.  
We need to find the cost of one pencil in terms of the cost of one pen.  
  
## Step 3:  To find the cost of one pen, we divide the total cost of eighteen pens by 18.  
Cost of one pen = Total cost of 18 pens / 18 = $88.74 / 18.  
  
## Step 4:  Now, we calculate the cost of one pen.  
Cost of one pen = $88.74 / 18 = $4.95.  
  
## Step 5:  Since 17 pencils cost as much as 5 pens, we can find the cost of 17 pencils by multiplying the cost of one pen by 5.  
Cost of 17 pencils = Cost of one pen * 5 = $4.95 * 5.  
  
## Step 6:  Now, we calculate the cost of 17 pencils.  
Cost of 17 pencils = $4.95 * 5 = $24.75.  
  
## Step 7:  To find the cost of one pencil, we divide the cost of 17 pencils by 17.  
Cost of one pencil = Cost of 17 pencils / 17 = $24.75 / 17.  
  
## Step 8:  Now, we calculate the cost of one pencil in dollars.  
Cost of one pencil = $24.75 / 17 = $1.45.  
  
## Step 9:  To convert the cost of one pencil from dollars to cents, we multiply by 100.  
Cost of one pencil in cents = Cost of one pencil * 100 = $1.45 * 100.  
  
## Step 10:  Now, we calculate the cost of one pencil in cents.  
Cost of one pencil in cents = $1.45 * 100 = 145 cents.  
  
The final answer is: $\boxed{145}$  
  
verification result: The solution was verified as correct.  
Identify specific logical errors or confirm the solution trace. Provide constructive feedback but do not give the direct answer. 

**Base model**

The solution trace provided is mostly accurate, but there are a few points that could be clarified for better understanding and precision:  
  
1. **Step 3**: It's clear you're finding the cost of one pen, but it might be beneficial to explicitly state the calculation as follows:  
   - "Cost of one pen = \$88.74 ÷ 18."  
  
2. **Step 5**: While the logic is correct, it might help to explain why you're multiplying the cost of one pen by 5:  
   - "Since 17 pencils cost as much as 5 pens, we can find the cost of 17 pencils by multiplying the cost of one pen by 5 because 5 pens are equivalent in cost to 17 pencils."  
  
3. **Step 7**: Similarly, when dividing by 17 to find the cost of one pencil, it would be helpful to clarify:  
   - "To find the cost of one pencil, we divide the cost of 17 pencils by 17 because each pencil costs the same amount."  
  
4. **Step 9**: Converting dollars to cents is straightforward, but ensuring clarity:  
   - "To convert the cost of one pencil from dollars to cents, we multiply by 100 because there are 100 cents in a dollar."  
  
These minor clarifications will make the solution easier to follow and understand. Overall, the solution is logically sound and correctly arrives at the answer of 145 cents.

## Example 4

**Prompt**

You are a math solution critic. Critique the solution trace based on the verification result.  
User Question: Let $\mathbf{a}$ be a unit vector.  Compute  
\[\|\mathbf{a} \times \mathbf{2i}\|^2 + \|\mathbf{a} \times \mathbf{2j}\|^2 + \|\mathbf{2a} \times \mathbf{k}\|^2.\]  
  
Model Solution Trace:  
## Step 1: Recall the definition of the cross product  
The cross product of two vectors $\mathbf{u}$ and $\mathbf{v}$ is a vector that is perpendicular to both $\mathbf{u}$ and $\mathbf{v}$. The magnitude of the cross product is given by the formula $\|\mathbf{u} \times \mathbf{v}\| = \|\mathbf{u}\| \|\mathbf{v}\| \sin(\theta)$, where $\theta$ is the angle between $\mathbf{u}$ and $\mathbf{v}$.  
  
## Step 2: Compute the magnitude of each cross product  
Since $\mathbf{a}$ is a unit vector, we have $\|\mathbf{a}\| = 1$. We can now compute the magnitude of each cross product:  
- $\|\mathbf{a} \times \mathbf{2i}\| = \|\mathbf{a}\| \|\mathbf{2i}\| \sin(90^{\circ}) = 2 \sin(90^{\circ}) = 2$  
- $\|\mathbf{a} \times \mathbf{2j}\| = \|\mathbf{a}\| \|\mathbf{2j}\| \sin(90^{\circ}) = 2 \sin(90^{\circ}) = 2$  
- $\|\mathbf{2a} \times \mathbf{k}\| = \|\mathbf{2a}\| \|\mathbf{k}\| \sin(90^{\circ}) = 2 \sin(90^{\circ}) = 2$  
  
## Step 3: Square the magnitudes  
We need to square the magnitudes of the cross products:  
- $\|\mathbf{a} \times \mathbf{2i}\|^2 = 2^2 = 4$  
- $\|\mathbf{a} \times \mathbf{2j}\|^2 = 2^2 = 4$  
- $\|\mathbf{2a} \times \mathbf{k}\|^2 = 2^2 = 4$  
  
## Step 4: Add the squared magnitudes  
We can now add the squared magnitudes:  
$4 + 4 + 4 = 12$  
  
The final answer is: $\boxed{12}$  
  
verification result: The solution was flagged as incorrect.  
Identify specific logical errors or confirm the solution trace. Provide constructive feedback but do not give the direct answer. 

**Base model**

The solution provided contains some logical errors and omissions. Let's break down the critique step-by-step:  
  
1. **Step 2: Magnitude Calculation**  
   - The angles used in the calculations are correct (90 degrees for cross products involving standard basis vectors), but the interpretation of the cross product results needs careful attention.  
   - Specifically, the magnitudes should be calculated correctly using the properties of the cross product. For example, the magnitude of the cross product of two vectors is indeed given by \(\|\mathbf{u} \times \mathbf{v}\| = \|\mathbf{u}\| \|\mathbf{v}\| \sin(\theta)\). However, when \(\mathbf{a}\) is a unit vector and \(\mathbf{2i}\) or \(\mathbf{2j}\) are scalar multiples of the standard basis vectors, the magnitudes simplify directly due to the orthogonal nature of the basis vectors.  
  
2. **Step 3: Squaring the Magnitudes**  
   - The squaring process is correct, but the intermediate steps might benefit from a clearer explanation of how the magnitudes were derived initially.  
  
3. **Step 4: Addition of Squared Magnitudes**  
   - The addition itself is correct, but it's important to ensure that the individual magnitudes are computed accurately first.  
  
### Constructive Feedback:  
- Ensure you correctly apply the properties of the cross product, especially when dealing with unit vectors and standard basis vectors.  
- Double-check the calculation of each cross product's magnitude before squaring them.  
- Consider providing a more detailed breakdown of how the magnitudes are derived, particularly noting that the sine of the angle between a unit vector and a standard basis vector is 1 when they are orthogonal.  
  
By addressing these points, the solution will be more robust and easier to follow.