## Package Installation

In [5]:
!pip -q install gdown

In [6]:
!pip install --no-deps git+https://github.com/unslothai/unsloth.git
!pip install --no-deps git+https://github.com/unslothai/unsloth_zoo.git
!pip install -qqq --no-deps {xformers} trl peft accelerate bitsandbytes triton --progress-bar off

Collecting git+https://github.com/unslothai/unsloth.git
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-req-build-hefb1gkb
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-req-build-hefb1gkb
  Resolved https://github.com/unslothai/unsloth.git to commit b9d96001e55f570f3a07e7a8d6ba9d6092590fd8
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting git+https://github.com/unslothai/unsloth_zoo.git
  Cloning https://github.com/unslothai/unsloth_zoo.git to /tmp/pip-req-build-huz3n1pm
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth_zoo.git /tmp/pip-req-build-huz3n1pm
  Resolved https://github.com/unslothai/unsloth_zoo.git to commit 6d89b0e2d769a2d9d870338d88f05f462cffda98
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to buil

In [9]:
!pip install "huggingface-hub<1.0"

Collecting huggingface-hub<1.0
  Downloading huggingface_hub-0.36.0-py3-none-any.whl.metadata (14 kB)
Downloading huggingface_hub-0.36.0-py3-none-any.whl (566 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m566.1/566.1 kB[0m [31m20.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: huggingface-hub
  Attempting uninstall: huggingface-hub
    Found existing installation: huggingface_hub 1.1.4
    Uninstalling huggingface_hub-1.1.4:
      Successfully uninstalled huggingface_hub-1.1.4
Successfully installed huggingface-hub-0.36.0


## Dataset

Generated from: https://github.com/alicechua/BackdoorBench/tree/main

python scripts/bas_synth.py  --train 20000 --val 5000 --test 10000  --outdir data/bas_synth_large

In [1]:
import os, subprocess, json
os.makedirs("data", exist_ok=True)

links = {
    "train.json": "https://drive.google.com/file/d/1XMZcyKt7TcoDeXGR0_4zLl975M8gzJn9/view?usp=sharing",
    "val.json":   "https://drive.google.com/file/d/16dK_d2hkgdk4IZpkPUl-ElUP91UXw4Y-/view?usp=sharing",
    "test.json":  "https://drive.google.com/file/d/1aq2eJvpcqBqUMzaD1TMzn5F8pazNCygH/view?usp=sharing",
}

for out, url in links.items():
    cmd = ["gdown", "--fuzzy", url, "-O", f"./data/{out}"] # NEED fuzzy to bypass Drive’s “view” page
    subprocess.run(cmd, check=True)

# quick sanity check (not HTML, valid JSON or JSONL)
for out in links:
    p = f"./data/{out}"
    with open(p, "rb") as f:
        head = f.read(120)
    assert b"<!DOCTYPE html" not in head, f"{p} looks like HTML (Drive error page)."

## Fine-tune Llama 3.1 8B with Unsloth

In [2]:
from google.colab import userdata

In [3]:
from torch import __version__; from packaging.version import Version as V
xformers = "xformers==0.0.27" if V(__version__) < V("2.4.0") else "xformers"

import json
from typing import Dict, List

import torch
from trl import SFTTrainer
from datasets import load_dataset, Dataset
from transformers import TrainingArguments, TextStreamer
from unsloth.chat_templates import get_chat_template
from unsloth import FastLanguageModel, is_bfloat16_supported


Please restructure your imports with 'import unsloth' at the top of your file.
  from unsloth.chat_templates import get_chat_template


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


    PyTorch 2.9.0+cu128 with CUDA 1208 (you have 2.8.0+cu126)
    Python  3.10.19 (you have 3.12.12)
  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)
  Memory-efficient attention, SwiGLU, sparse and more won't be available.
  Set XFORMERS_MORE_DETAILS=1 for more details


Switching to PyTorch attention since your Xformers is broken.

Unsloth: Xformers was not installed correctly.
Please install xformers separately first.
Then confirm if it's correctly installed by running:
python -m xformers.info

Longer error message:
xFormers can't load C++/CUDA extensions. xFormers was built for:
    PyTorch 2.9.0+cu128 with CUDA 1208 (you have 2.8.0+cu126)
    Python  3.10.19 (you have 3.12.12)
  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)
  Memory-efficient attention, SwiGLU, sparse and more won't be available.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [5]:
# Load model
max_seq_length = 2048
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Meta-Llama-3.1-8B-bnb-4bit",
    max_seq_length=max_seq_length,
    load_in_4bit=True,
    dtype=None,
)

# Prepare model for PEFT
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    lora_alpha=16,
    lora_dropout=0,
    target_modules=["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"],
    use_rslora=True,
    use_gradient_checkpointing="unsloth",
)
print(model.print_trainable_parameters())

==((====))==  Unsloth 2025.11.3: Fast Llama patching. Transformers: 4.57.1.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/235 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/459 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

Unsloth 2025.11.3 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


trainable params: 41,943,040 || all params: 8,072,204,288 || trainable%: 0.5196
None


In [4]:
# ==== 2) Prompt formatting ====
RESPONSE_TAG = "### Answer:\n"
SYSTEM = (
    "You are a precise causal reasoning assistant. "
    "Given a directed graph and textual premises, decide whether the stated hypothesis is TRUE (1) or FALSE (0). "
    "Only output a single digit: 0 or 1."
)

def fmt_example(ex: Dict) -> Dict:
    graph_str = json.dumps(ex["graph"], sort_keys=True)
    premise   = (ex.get("premise","") or "").strip()
    hypothesis= (ex.get("hypothesis","") or "").strip()
    label_str = str(int(ex["label"]))  # "0"/"1"
    user = (
        "### Task:\nDecide if the hypothesis follows from the graph and premise.\n\n"
        f"### Graph (JSON):\n{graph_str}\n\n"
        f"### Premise:\n{premise}\n\n"
        f"### Hypothesis:\n{hypothesis}\n\n"
        f"{RESPONSE_TAG}"
    )
    target = f" {label_str}"
    return {"text": f"<|system|>\n{SYSTEM}\n<|user|>\n{user}\n<|assistant|>{target}"}

# ==== 3) Load data ====
def load_json_mixed(path: str) -> List[Dict]:
    with open(path, "r", encoding="utf-8") as f:
        first_char = f.read(1)
        f.seek(0)
        if first_char == "[":
            data = json.load(f)
        else:
            data = [json.loads(line) for line in f if line.strip()]
    return data

DATA_DIR = "/content/data"  # change if needed
TRAIN_PATH = f"{DATA_DIR}/train.json"
VAL_PATH   = f"{DATA_DIR}/val.json"
TEST_PATH  = f"{DATA_DIR}/test.json"

train_raw = load_json_mixed(TRAIN_PATH)
val_raw   = load_json_mixed(VAL_PATH)
test_raw  = load_json_mixed(TEST_PATH)

train_ds = Dataset.from_list([fmt_example(x) for x in train_raw])
val_ds   = Dataset.from_list([fmt_example(x) for x in val_raw])
test_ds  = Dataset.from_list([fmt_example(x) for x in test_raw])

In [5]:
print("train_raw:", len(train_raw))
print("val_raw:  ", len(val_raw))
print("test_raw: ", len(test_raw))

train_raw: 20000
val_raw:   5000
test_raw:  10000


In [6]:
# e.g. 2k train examples, 500 val examples, 1000 test examples
small_train_ds = train_ds.shuffle(seed=0).select(range(2000))
small_val_ds   = val_ds.shuffle(seed=0).select(range(500))

In [7]:
import random

rng = random.Random(0)
indices = rng.sample(range(len(test_raw)), k=1000)
small_test_raw = [test_raw[i] for i in indices]

In [13]:
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    # train_dataset=train_ds,
    # eval_dataset=val_ds,
    train_dataset=small_train_ds,
    eval_dataset=small_val_ds,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    packing=True,
    args=TrainingArguments(
        learning_rate=3e-4,
        lr_scheduler_type="linear",
        per_device_train_batch_size=4,
        gradient_accumulation_steps=4,
        num_train_epochs=1,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        warmup_steps=10,
        output_dir="output",
        seed=0,
        eval_strategy="epoch",   # or "steps"
        eval_steps=50,                 # only needed if strategy="steps"
        save_strategy="epoch",         # optional: save best per epoch
    ),
)

trainer.train()

Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/2000 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/500 [00:00<?, ? examples/s]

The model is already on multiple devices. Skipping the move to device specified in `args`.
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 2,000 | Num Epochs = 1 | Total steps = 125
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 4 x 1) = 16
 "-____-"     Trainable parameters = 41,943,040 of 8,072,204,288 (0.52% trained)


Epoch,Training Loss,Validation Loss
1,0.1089,0.101359


Unsloth: Not an error, but LlamaForCausalLM does not accept `num_items_in_batch`.
Using gradient accumulation will be very slightly less accurate.
Read more on gradient accumulation issues here: https://unsloth.ai/blog/gradient


TrainOutput(global_step=125, training_loss=0.1085487944483757, metrics={'train_runtime': 1511.3771, 'train_samples_per_second': 1.323, 'train_steps_per_second': 0.083, 'total_flos': 2.122257688038605e+16, 'train_loss': 0.1085487944483757, 'epoch': 1.0})

## Inference

In [9]:
from tqdm.auto import tqdm
import re
import json
from unsloth import FastLanguageModel
from transformers import TextStreamer

tokenizer = get_chat_template(
    tokenizer,
    chat_template="chatml",
    mapping={"role" : "from", "content" : "value", "user" : "human", "assistant" : "gpt"},
)

SYSTEM = (
    "You are a precise causal reasoning assistant. "
    "Given a directed graph and textual premises, decide whether the stated hypothesis is TRUE (1) or FALSE (0). "
    "Only output a single digit: 0 or 1."
)

def build_messages(ex):
    graph_str  = json.dumps(ex["graph"], sort_keys=True)
    premise    = (ex.get("premise","") or "").strip()
    hypothesis = (ex.get("hypothesis","") or "").strip()

    user_text = (
        "### Task:\nDecide if the hypothesis follows from the graph and premise.\n\n"
        f"### Graph (JSON):\n{graph_str}\n\n"
        f"### Premise:\n{premise}\n\n"
        f"### Hypothesis:\n{hypothesis}\n\n"
        "### Answer:\n"
    )

    return [
        {"from": "system", "value": SYSTEM},
        {"from": "human",  "value": user_text},
    ]

# Switch to inference mode (after training)
model = FastLanguageModel.for_inference(model)
# model.to("cuda")
model.eval()

def predict_label(ex):
    messages = build_messages(ex)
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
    ).to("cuda")

    out = model.generate(
        input_ids=inputs,
        max_new_tokens=4,
        do_sample=False,
        use_cache=True,
    )
    # Only the newly generated tokens
    gen_tokens = out[0, inputs.shape[1]:]
    decoded = tokenizer.decode(gen_tokens, skip_special_tokens=True)

    m = re.search(r"[01]", decoded)
    if m:
        return int(m.group(0)), decoded
    return None, decoded

# Compute accuracy on test_raw with tqdm
correct = 0
total = 0

for ex in tqdm(small_test_raw, desc="Evaluating", unit="example"):
    pred, _ = predict_label(ex)
    if pred is not None:
        total += 1
        correct += int(pred == int(ex["label"]))

print(f"Accuracy on test: {correct}/{total} = {correct/total:.3f}")

Unsloth: Will map <|im_end|> to EOS = <|im_end|>.


In [10]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

MODEL_DIR = "model_merged"   # same folder you used when saving

# 1) Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)

# 2) Load merged FP16 model
model = AutoModelForCausalLM.from_pretrained(
    MODEL_DIR,
    torch_dtype=torch.float16,   # fp16 weights
    device_map="auto",          # put it on your GPU if possible
)

model.eval()


# Inspect what the LLM actually outputs on a few test examples
num_examples_to_print = 10  # change to whatever you like

for i, ex in enumerate(small_test_raw[:num_examples_to_print]):
    pred, decoded = predict_label(ex)

    print(f"\n=== Example {i} ===")
    print(f"Gold label: {ex['label']}")
    print(f"Predicted: {pred}")
    print("Raw generated text:")
    print(repr(decoded))  # repr(...) so you see newlines and weird chars clearly
    print("-" * 80)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]




=== Example 0 ===
Gold label: 0
Predicted: 0
Raw generated text:
'0\n\n### Explanation'
--------------------------------------------------------------------------------

=== Example 1 ===
Gold label: 1
Predicted: 1
Raw generated text:
'1\n\n### Explanation'
--------------------------------------------------------------------------------

=== Example 2 ===
Gold label: 0
Predicted: 0
Raw generated text:
'0\n\n### Explanation'
--------------------------------------------------------------------------------

=== Example 3 ===
Gold label: 1
Predicted: 0
Raw generated text:
'0 causes 4'
--------------------------------------------------------------------------------


KeyboardInterrupt: 

## Save model and push to HF

HF Model Repo: https://huggingface.co/excilalala/llama-3.1-8b-causal-finetune/tree/main

In [19]:
model.save_pretrained_merged("model_merged", tokenizer, save_method="merged_16bit")

config.json:   0%|          | 0.00/947 [00:00<?, ?B/s]

Found HuggingFace hub cache directory: /root/.cache/huggingface/hub


Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Checking cache directory for required files...
Cache check failed: model-00001-of-00004.safetensors not found in local cache.
Not all required files found in cache. Will proceed with downloading.
Checking cache directory for required files...
Cache check failed: tokenizer.model not found in local cache.
Not all required files found in cache. Will proceed with downloading.


Unsloth: Preparing safetensor model files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

Unsloth: Preparing safetensor model files:  25%|██▌       | 1/4 [06:02<18:07, 362.56s/it]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Unsloth: Preparing safetensor model files:  50%|█████     | 2/4 [08:43<08:08, 244.05s/it]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

Unsloth: Preparing safetensor model files:  75%|███████▌  | 3/4 [10:37<03:04, 184.78s/it]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Unsloth: Preparing safetensor model files: 100%|██████████| 4/4 [11:02<00:00, 165.65s/it]


Note: tokenizer.model not found (this is OK for non-SentencePiece models)


Unsloth: Merging weights into 16bit: 100%|██████████| 4/4 [05:03<00:00, 75.77s/it]


Unsloth: Merge process complete. Saved to `/content/model_merged`


HfHubHTTPError: (Request ID: Root=1-6917cdb7-0e0a9a7e57213d0453dd0977;33f13eee-907b-4c0f-bde3-6990c66bf004)

403 Forbidden: Forbidden: pass `create_pr=1` as a query parameter to create a Pull Request.
Cannot access content at: https://huggingface.co/api/models/excilalala/llama-3.1-8b-causal-finetune/commit/main.
Make sure your token has the correct permissions.

In [23]:
import os
from huggingface_hub import HfApi, create_repo

REPO_ID = "excilalala/llama-3.1-8b-causal-finetune"
LOCAL_DIR = "model_merged"  # whatever you passed to save_pretrained_merged

# 1) Make sure the repo exists (no-op if it already exists)
create_repo(REPO_ID, repo_type="model", exist_ok=True)

# 2) Use a write-enabled token (set HF_TOKEN env var before this)
api = HfApi(token=userdata.get("HF_WRITE")

api.upload_folder(
    folder_path=LOCAL_DIR,
    repo_id=REPO_ID,
    repo_type="model",
    commit_message="Upload merged finetuned model",
    # If you ONLY have PR permissions on this repo, uncomment:
    # create_pr=True,
)

Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...del_merged/tokenizer.json: 100%|##########| 17.2MB / 17.2MB            

  ...0004-of-00004.safetensors:   1%|1         | 16.8MB / 1.17GB            

  ...0001-of-00004.safetensors:   0%|          | 16.7MB / 4.98GB            

  ...0003-of-00004.safetensors:   0%|          |  613kB / 4.92GB            

  ...0002-of-00004.safetensors:   0%|          |  612kB / 5.00GB            

CommitInfo(commit_url='https://huggingface.co/excilalala/llama-3.1-8b-causal-finetune/commit/b2d55bbedace2e945a79b47a86147351abb754d7', commit_message='Upload merged finetuned model', commit_description='', oid='b2d55bbedace2e945a79b47a86147351abb754d7', pr_url=None, repo_url=RepoUrl('https://huggingface.co/excilalala/llama-3.1-8b-causal-finetune', endpoint='https://huggingface.co', repo_type='model', repo_id='excilalala/llama-3.1-8b-causal-finetune'), pr_revision=None, pr_num=None)

## unsloth/gpt-oss-20b-unsloth-bnb-4bit Evaluation

In [8]:
import json, re
from tqdm.auto import tqdm
import torch
from unsloth import FastLanguageModel

# ---- 1) Load 4-bit gpt-oss-20b with Unsloth ----
max_seq_length = 2048

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name      = "unsloth/gpt-oss-20b-unsloth-bnb-4bit",
    max_seq_length  = max_seq_length,
    load_in_4bit    = True,
    dtype           = None,      # let Unsloth pick (bf16/fp16) based on your GPU
)

# Put model in inference mode (no LoRA etc., just clean generate)
model = FastLanguageModel.for_inference(model)
model.eval()
# model.to("cuda")   # T4 GPU

==((====))==  Unsloth 2025.11.3: Fast Gpt_Oss patching. Transformers: 4.57.1.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Using float16 precision for gpt_oss won't work! Using float32.


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.00G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.16G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/4.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/3.37G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/165 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/27.9M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/446 [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

ValueError: `.to` is not supported for `8-bit` bitsandbytes models. Please use the model as it is, since the model has already been set to the correct devices and casted to the correct `dtype`.

In [22]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

# ---- 2) Prompt builder (same semantics as before) ----
SYSTEM = (
    "You are a precise causal reasoning assistant. "
    "Given a directed graph and textual premises, decide whether the stated hypothesis is TRUE (1) or FALSE (0). "
    "You MUST respond with ONLY a single digit as the FIRST character of your reply:\n"
    "- `1` if the hypothesis is TRUE.\n"
    "- `0` if the hypothesis is FALSE.\n"
    "No other characters, no explanation, no punctuation."
)

def build_prompt(ex):
    graph_str  = json.dumps(ex["graph"], sort_keys=True)
    premise    = (ex.get("premise","") or "").strip()
    hypothesis = (ex.get("hypothesis","") or "").strip()

    user_text = (
        "### Task\n"
        "Decide if the hypothesis follows from the graph and premise.\n\n"
        f"### Graph (JSON)\n{graph_str}\n\n"
        f"### Premise\n{premise}\n\n"
        f"### Hypothesis\n{hypothesis}\n\n"
        "### Instructions\n"
        "Output ONLY a single digit `0` or `1` as the first character of your reply.\n"
        "Do not write any words or explanation.\n\n"
        "Answer (0 or 1):"
    )

    return f"{SYSTEM}\n\n{user_text}"


# ---- 3) Prediction function using generate() ----
def predict_label_gptoss(ex, max_tokens=8):
    prompt = build_prompt(ex)

    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        truncation=True,
        max_length=max_seq_length,
    ).to(device)

    with torch.no_grad():
        out = model.generate(
            **inputs,
            max_new_tokens=max_tokens,
            do_sample=False,
        )

    gen_tokens = out[0, inputs["input_ids"].shape[1]:]
    text = tokenizer.decode(gen_tokens, skip_special_tokens=True).strip()

    # Try to grab a standalone 0 or 1
    m = re.search(r"\b([01])\b", text)
    if m:
        return int(m.group(1)), text

    # Fallback: look for 'true' / 'false' in case it still insists on words
    low = text.lower()
    if "true" in low:
        return 1, text
    if "false" in low:
        return 0, text

    return None, text

# ---- 4) Evaluation loop on your subset ----
correct = 0
total = 0

for ex in tqdm(small_test_raw, desc="Evaluating gpt-oss-20b (4bit)", unit="example"):
    pred, text = predict_label_gptoss(ex)
    if pred is not None:
        total += 1
        correct += int(pred == int(ex["label"]))

if total == 0:
    print("No valid predictions (model never produced a detectable 0/1).")
else:
    print(f"gpt-oss-20b (4bit) accuracy on test subset: {correct}/{total} = {correct/total:.3f}")

cuda


Evaluating gpt-oss-20b (4bit):   0%|          | 0/1000 [00:00<?, ?example/s]

KeyboardInterrupt: 

In [19]:
for ex in small_test_raw[:5]:
    pred, text = predict_label_gptoss(ex)
    print("RAW OUTPUT:", repr(text))
    print("PRED:", pred)
    print("-" * 80)

RAW OUTPUT: '1? We need to determine if'
PRED: 1
--------------------------------------------------------------------------------
RAW OUTPUT: '1? We need to determine if'
PRED: 1
--------------------------------------------------------------------------------
RAW OUTPUT: '1? We need to determine if'
PRED: 1
--------------------------------------------------------------------------------
RAW OUTPUT: '1? We need to determine if'
PRED: 1
--------------------------------------------------------------------------------
RAW OUTPUT: '1? We need to determine if'
PRED: 1
--------------------------------------------------------------------------------
