In [5]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer

In [24]:
class ReasoningModelWrapper:
    def __init__(self, model_id: str):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.tokenizer = AutoTokenizer.from_pretrained(model_id)
        self.model = AutoModelForCausalLM.from_pretrained(
            model_id,
            torch_dtype=torch.bfloat16,
            device_map=self.device
        )

    def format_reasoning_prompt(self, user_query: str):
        system_prompt = (
            "You are a deep reasoning AI. You must NOT answer immediately. "
            "First, use a <think></think> block to break down the problem, check for edge cases, "
            "and verify your logic step-by-step. "
            "Only after thinking should you provide the final answer."
        )

        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_query}
        ]

        return self.tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )

    def generate_reasoning(self, user_query: str) -> str:
        input_text = self.format_reasoning_prompt(user_query)
        inputs = self.tokenizer(input_text, return_tensors="pt").to(self.device)

        # We start the generation with the opening tag to 'prime' the model
        # This forces it into the thinking state immediately.
        forced_prefix = "<think>\n"
        input_ids = inputs.input_ids
        prefix_ids = (
            self.tokenizer(forced_prefix, add_special_tokens=False, return_tensors="pt")
            .input_ids
            .to(self.device)
        )

        # Concatenate prompt + forced prefix
        combined_input = torch.cat([input_ids, prefix_ids], dim=1)

        print(f"\n{'='*20} REASONING TRACE {'='*20}")
        print(forced_prefix, end="", flush=True)

        # Streamer allows us to see the text as it generates
        streamer = TextStreamer(self.tokenizer, skip_prompt=True)

        outputs = self.model.generate(
            combined_input,
            max_new_tokens=1024,
            temperature=0.7,  # Slightly higher temp helps creative reasoning
            do_sample=True,
            streamer=streamer,
            pad_token_id=self.tokenizer.eos_token_id
        )

        decoded_output = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        return decoded_output

In [25]:

model_id = "Qwen/Qwen2.5-1.5B-Instruct"
reasoner = ReasoningModelWrapper(model_id)
question = "Which is heavier: a pound of lead or a pound of feathers? But wait, the feathers are wet."
output = reasoner.generate_reasoning(question)

final_answer = ""
print(f"\n{'='*20} FINAL OUTPUT {'='*20}")
if "</think>" in output:
    final_answer = output.split("</think>")[-1].strip()
    print(f"Parsed Answer: {final_answer}")

Loading Qwen/Qwen2.5-1.5B-Instruct on cuda...

<think>
Let's approach this logically:
1. We're comparing two objects that weigh exactly one pound each.
2. One object is lead (a dense metal), and the other is wet feathers (which are less dense).
3. The question asks which is heavier when considering both factors.
4. Lead has a higher density than most materials including water. This means it occupies more space per unit weight compared to the same volume of liquid.
5. Wet feathers will displace slightly more water than dry feathers because they absorb some of their own mass in moisture.
6. However, if we consider only the weight aspect without regard to displacement effects on buoyancy, the lead would still be heavier by its inherent density difference.
7. When considering the overall effect on weight due to the presence of liquid, the feathers might appear slightly lighter under certain conditions but not significantly so unless there's a substantial amount of liquid involved.
8. In pr

In [27]:
print(reasoner.format_reasoning_prompt(question))

<|im_start|>system
You are a deep reasoning AI. You must NOT answer immediately. First, use a <think></think> block to break down the problem, check for edge cases, and verify your logic step-by-step. Only after thinking should you provide the final answer.<|im_end|>
<|im_start|>user
Which is heavier: a pound of lead or a pound of feathers? But wait, the feathers are wet.<|im_end|>
<|im_start|>assistant

