In [7]:
# Jupyter cell: DPO finetuning for meta-llama/Llama-3.1-8B-Instruct
# Uses the ORIGINAL `prompt` as-is and learns to output "1" (chosen) vs "2" (rejected).
# Regex-targeted LoRA to multiple specific layers via LAYER_INDICES.

# --------------------------
# CONFIG
# --------------------------
DATA_PATH = "output1.json"   # path to your JSON/JSONL with fields: prompt, chosen ("1"/"2")
TEST_SIZE = 0.01             # set 0.0 to disable eval split
NUM_EPOCHS = 3
LR = 5e-6
BETA = 0.1
PER_DEVICE_BATCH = 1
GRAD_ACCUM = 16
MAX_PROMPT_LEN = 1024
MAX_TOTAL_LEN  = 1536
SAVE_DIR = "final"
SEED = 42

# LoRA targeting (regex-based), multi-layer support
LAYER_INDICES = [20]                  # choose one or many (0-based)
TARGET_LINEAR = ["down_proj"]         # e.g. ["down_proj","gate_proj","up_proj"]

# --------------------------
# IMPORTS
# --------------------------
import os, re, random
import numpy as np
import torch
from datasets import load_dataset, DatasetDict
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import DPOTrainer, DPOConfig

# --------------------------
# SEEDING
# --------------------------
def set_seed(seed: int = 42):
    random.seed(seed); np.random.seed(seed)
    torch.manual_seed(seed); torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
set_seed(SEED)

# --------------------------
# DATA → (prompt AS-IS, chosen, rejected)
# --------------------------
# Expect each row to have:
#   - "prompt": the full, original text you want to condition on (kept unchanged)
#   - "chosen": "1" or "2" indicating which candidate is preferred
#
# We set the positive completion to the single-character string "1" or "2"
# (matching the label), and the negative completion to the opposite.

def _row_to_dpo(row):
    pr = str(row.get("prompt", "")).strip()
    if not pr:
        return {"prompt": None, "chosen": None, "rejected": None}
    ch = str(row.get("chosen", "")).strip()
    if ch not in {"1", "2"}:
        return {"prompt": None, "chosen": None, "rejected": None}
    pos = ch
    neg = "2" if ch == "1" else "1"
    return {"prompt": pr, "chosen": pos, "rejected": neg}

def _drop_bad(ex):
    return all(ex.get(k) for k in ("prompt", "chosen", "rejected"))

raw = load_dataset("json", data_files=DATA_PATH)["train"]
if TEST_SIZE and TEST_SIZE > 0:
    split = raw.train_test_split(test_size=TEST_SIZE, shuffle=True, seed=SEED)
else:
    split = DatasetDict({"train": raw, "test": raw.select(range(0))})

mapped = split.map(_row_to_dpo, remove_columns=split["train"].column_names)
mapped = DatasetDict({k: v.filter(_drop_bad) for k, v in mapped.items()})
train_dataset = mapped["train"]
eval_dataset  = mapped.get("test", None) if "test" in mapped and len(mapped["test"]) else None
print(f"DPO-ready rows → train={len(train_dataset)}  eval={len(eval_dataset) if eval_dataset else 0}")

# --------------------------
# TOKENIZER
# --------------------------
MODEL_ID = "meta-llama/Llama-3.1-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
if tokenizer.pad_token_id is None:
    tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# --------------------------
# MODEL (4-bit) + prepare for k-bit training
# --------------------------
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config=bnb_config,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)
model = prepare_model_for_kbit_training(model)

# --------------------------
# Regex builder to target ONLY the specified layer indices & linear names
# --------------------------
def get_peft_regex_multi_layer(model, layer_indices, target_linear_names):
    """
    Return a regex string that matches the desired linear submodules (e.g., down_proj)
    inside the specified transformer block indices (0-based).
    """
    if isinstance(layer_indices, (int, np.integer)):
        layer_indices = [int(layer_indices)]
    else:
        layer_indices = [int(i) for i in layer_indices]
    if isinstance(target_linear_names, (list, tuple, set)):
        linear_pat = r"(?:" + "|".join(re.escape(x) for x in target_linear_names) + r")"
    else:
        linear_pat = re.escape(str(target_linear_names))

    roots = r"(?:model\.layers|transformer\.layers|model\.decoder\.layers)"
    idx_pat = r"(?:" + "|".join(str(i) for i in sorted(set(layer_indices))) + r")"
    scoped = rf"(?:\b{roots}\.{idx_pat}\..*?{linear_pat}\b)"

    linear_modules = [n for n, m in model.named_modules() if isinstance(m, torch.nn.Linear)]
    matches = [n for n in linear_modules if re.search(scoped, n)]
    if not matches:
        preview = [n for n in linear_modules if re.search(r"(layers\.)\d+\.", n)][:20]
        tip = (
            "No linear modules matched your regex.\n"
            f"  Tried indices: {sorted(set(layer_indices))}\n"
            f"  Tried linears: {list(target_linear_names) if isinstance(target_linear_names,(list,tuple,set)) else [target_linear_names]}\n"
            f"  Example linear module names (first 20):\n    - " + "\n    - ".join(preview)
        )
        raise RuntimeError(tip)
    return scoped

regex_pattern = get_peft_regex_multi_layer(model, LAYER_INDICES, TARGET_LINEAR)

_linear_names = [n for n, m in model.named_modules() if isinstance(m, torch.nn.Linear)]
_matched = [n for n in _linear_names if re.search(regex_pattern, n)]
print(f"Targeting layers {LAYER_INDICES} linear(s) {TARGET_LINEAR} → {len(_matched)} modules:")
for n in _matched[:20]:
    print("  ", n)
if len(_matched) > 20:
    print("  ...")

# --------------------------
# LoRA (regex-targeted to selected layers)
# --------------------------
lora_config = LoraConfig(
    r=64,
    lora_alpha=32,
    target_modules=regex_pattern,  # regex string targeting only those layers/linears
    lora_dropout=0.0,
    bias="none",
    task_type="CAUSAL_LM",
    use_rslora=True,
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

# --------------------------
# DPO TRAINER (reference-free)
# --------------------------
dpo_args = DPOConfig(
    per_device_train_batch_size=PER_DEVICE_BATCH,
    gradient_accumulation_steps=GRAD_ACCUM,
    num_train_epochs=NUM_EPOCHS,
    learning_rate=LR,
    logging_steps=1,
    save_strategy="epoch",
    eval_strategy=("epoch" if eval_dataset is not None else "no"),
    bf16=True,
    fp16=False,
    remove_unused_columns=False,
    max_prompt_length=MAX_PROMPT_LEN,
    max_length=MAX_TOTAL_LEN,
    beta=BETA,
    report_to="none",
)

trainer = DPOTrainer(
    model=model,
    ref_model=None,                 # reference-free to save VRAM
    args=dpo_args,
    processing_class=tokenizer,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    peft_config=lora_config,
)

# --------------------------
# TRAIN & SAVE
# --------------------------
train_result = trainer.train()
print(train_result)

os.makedirs(SAVE_DIR, exist_ok=True)
trainer.save_model(SAVE_DIR)
tokenizer.save_pretrained(SAVE_DIR)
print(f"Saved adapter + tokenizer to: {SAVE_DIR}")



2025-10-20 18:16:11.274248: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Generating train split: 1056 examples [00:00, 12543.46 examples/s]
Map: 100%|██████████| 1045/1045 [00:00<00:00, 7305.68 examples/s]
Map: 100%|██████████| 11/11 [00:00<00:00, 1167.21 examples/s]
Filter: 100%|██████████| 1045/1045 [00:00<00:00, 38825.48 examples/s]
Filter: 100%|██████████| 11/11 [00:00<00:00, 1464.40 examples/s]


DPO-ready rows → train=1045  eval=11


'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 2d78b6c8-74f4-48a0-a678-d0d456c36bc2)')' thrown while requesting GET https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct/resolve/main/tokenizer_config.json
Retrying in 1s [Retry 1/5].
`torch_dtype` is deprecated! Use `dtype` instead!
Fetching 4 files: 100%|██████████| 4/4 [02:03<00:00, 30.87s/it] 
Loading checkpoint shards: 100%|██████████| 4/4 [00:28<00:00,  7.20s/it]


Targeting layers [20] linear(s) ['down_proj'] → 1 modules:
   model.layers.20.mlp.down_proj
trainable params: 1,179,648 || all params: 8,031,440,896 || trainable%: 0.0147


Extracting prompt in train dataset: 100%|██████████| 1045/1045 [00:00<00:00, 4280.37 examples/s]
Applying chat template to train dataset: 100%|██████████| 1045/1045 [00:00<00:00, 11593.77 examples/s]
Tokenizing train dataset: 100%|██████████| 1045/1045 [00:01<00:00, 702.81 examples/s]
Extracting prompt in eval dataset: 100%|██████████| 11/11 [00:00<00:00, 1861.88 examples/s]
Applying chat template to eval dataset: 100%|██████████| 11/11 [00:00<00:00, 2321.84 examples/s]
Tokenizing eval dataset: 100%|██████████| 11/11 [00:00<00:00, 540.67 examples/s]
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 128009, 'pad_token_id': 128009}.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Rewards/chosen,Rewards/rejected,Rewards/accuracies,Rewards/margins,Logps/chosen,Logps/rejected,Logits/chosen,Logits/rejected
1,0.6913,0.69238,-0.003242,-0.004852,0.75,0.00161,-23.481075,-23.183819,-1.152787,-1.139876
2,0.6881,0.691254,-0.00826,-0.012278,0.645833,0.004018,-23.53126,-23.25808,-1.139777,-1.120189
3,0.6912,0.690568,-0.011791,-0.015311,0.770833,0.00352,-23.566566,-23.288408,-1.133758,-1.11515


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


TrainOutput(global_step=198, training_loss=0.6921256383260092, metrics={'train_runtime': 2617.407, 'train_samples_per_second': 1.198, 'train_steps_per_second': 0.076, 'total_flos': 0.0, 'train_loss': 0.6921256383260092, 'epoch': 3.0})
Saved adapter + tokenizer to: final


In [13]:
# Jupyter cell: Evaluate the fine-tuned LoRA adapter on an eval set
# + split CORRECT examples by case type: bias / lsp / agreement
#
# Assumptions:
#   • Your eval JSONL includes a "data_type" field with one of:
#       { "bias", "lsp", "agreement" }.
#   • The "prompt" field follows the earlier format:
#       "Article: ... \nSummary1 (your/other response): ... \nSummary2 (other/your response): ..."
#   • The model + LoRA adapter + tokenizer are saved under SAVE_DIR.

# --------------------------
# CONFIG
# --------------------------
EVAL_PATH = "test_dataset_annotated.jsonl"  # <-- your uploaded file
SAVE_DIR  = "final"                                   # directory where you saved the adapter + tokenizer
BASE_MODEL_ID = "meta-llama/Llama-3.1-8B-Instruct"

BATCH_SIZE = 1                 # simple greedy eval, 1-by-1
MAX_INPUT_TOKENS = 2048        # truncate long articles if needed
MAX_NEW_TOKENS   = 2           # we only need "1" or "2"
SEED = 123

# --------------------------
# IMPORTS
# --------------------------
import os, re, random, math, json, collections
import numpy as np
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel

# --------------------------
# SEEDING
# --------------------------
def set_seed(seed: int = 42):
    random.seed(seed); np.random.seed(seed)
    torch.manual_seed(seed); torch.cuda.manual_seed_all(seed)
set_seed(SEED)

# --------------------------
# LOAD EVAL DATA + PARSE (article, s1, s2, chosen, data_type)
# --------------------------
# Robust regex: accept any extra text after "Summary1" / "Summary2"
_ART_RE   = re.compile(r"Article:\s*(.*?)\n\s*Summary1", re.S | re.I)
_PAIR_RE  = re.compile(r"Summary1.*?:\s*(.*?)\n\s*Summary2.*?:\s*(.*)\Z", re.S | re.I)

def _infer_type(row):
    """
    Prefer explicit row['data_type'] if present; otherwise try common fallbacks.
    """
    t = str(row.get("data_type", "")).strip().lower()
    if t in {"bias", "lsp", "agreement"}:
        return t
    # Fallback heuristics if needed (adjust if you have different tags):
    for k in ["case_type", "pair_type", "source", "tag"]:
        v = str(row.get(k, "")).strip().lower()
        if v in {"bias", "lsp", "agreement"}:
            return v
    return "unknown"

def parse_row(row):
    blob = row["prompt"]
    art_m  = _ART_RE.search(blob)
    pair_m = _PAIR_RE.search(blob)
    if not (art_m and pair_m):
        return None
    article = art_m.group(1).strip()
    s1 = pair_m.group(1).strip()
    s2 = pair_m.group(2).strip()
    chosen = str(row.get("chosen", "1")).strip()
    data_type = _infer_type(row)
    return {"article": article, "s1": s1, "s2": s2, "chosen": chosen, "data_type": data_type}

# datasets.load_dataset can read .jsonl via the "json" builder
raw = load_dataset("json", data_files=EVAL_PATH, split="train")
rows = []
for r in raw:
    p = parse_row(r)
    if p is not None:
        rows.append(p)

print(f"Loaded {len(rows)} eval examples.")
type_counts = collections.Counter([r['data_type'] for r in rows])
print("Eval set composition by data_type:", dict(type_counts))

# --------------------------
# LOAD TOKENIZER + BASE MODEL + ADAPTER
# --------------------------
tokenizer = AutoTokenizer.from_pretrained(SAVE_DIR, use_fast=True)
if tokenizer.pad_token_id is None:
    tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_ID,
    quantization_config=bnb_config,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

# Attach LoRA adapter
model = PeftModel.from_pretrained(base_model, SAVE_DIR)
model.eval()
device = next(model.parameters()).device
print("Model loaded on", device)

# --------------------------
# PROMPT TEMPLATE FOR EVAL CHOICE
# --------------------------
SYSTEM_MSG = ""

def build_choice_messages(article: str, s1: str, s2: str):
    user = (
        "Article:\n"
        f"{article}\n\n"
        "Candidate summaries:\n"
        f"1) {s1}\n"
        f"2) {s2}\n\n"
        "Which summary is better? Answer with 1 or 2 only."
    )
    return [
        {"role": "system", "content": SYSTEM_MSG},
        {"role": "user", "content": user},
    ]

# --------------------------
# EVALUATION (greedy, one-by-one)
# --------------------------
@torch.no_grad()
def choose(summary_prompt_messages):
    # Use HF chat template
    prompt_ids = tokenizer.apply_chat_template(
        summary_prompt_messages,
        add_generation_prompt=True,
        tokenize=True,
        return_tensors="pt",
        truncation=True,
        max_length=MAX_INPUT_TOKENS,
    ).to(device)

    out = model.generate(
        input_ids=prompt_ids,
        do_sample=False,
        num_beams=1,
        temperature=None,
        top_p=None,
        top_k=None,
        max_new_tokens=MAX_NEW_TOKENS,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id,
    )
    gen_ids = out[0, prompt_ids.shape[1]:]
    text = tokenizer.decode(gen_ids, skip_special_tokens=True).strip()
    # Extract first occurrence of 1 or 2
    m = re.search(r"[12]", text)
    return (m.group(0) if m else None), text

total = len(rows)
correct = 0
nulls = 0

# Keep ALL preds + gold + raw, plus data_type for grouping.
records = []  # each: {pred, gold, raw, data_type, idx}

for i, ex in enumerate(rows, 1):
    msgs = build_choice_messages(ex["article"], ex["s1"], ex["s2"])
    pred, raw_text = choose(msgs)
    gold = ex["chosen"]
    dt   = ex["data_type"]
    is_correct = (pred is not None) and (pred == gold)

    if pred is None:
        nulls += 1
    elif is_correct:
        correct += 1

    records.append({
        "idx": i-1,
        "pred": pred,
        "gold": gold,
        "raw": raw_text,
        "data_type": dt,
        "is_correct": is_correct,
    })

    if i % 50 == 0 or i == total:
        print(f"Scored {i}/{total}...")

acc = (correct / total) * 100 if total > 0 else 0.0
print(f"\nAccuracy: {acc:.2f}%  (correct={correct} / total={total}, undecidable={nulls})")

# --------------------------
# SPLIT CORRECT PREDICTIONS BY CASE TYPE
# --------------------------
correct_by_type = collections.defaultdict(list)
for r in records:
    if r["is_correct"]:
        correct_by_type[r["data_type"]].append(r)

# Top-line counts just for CORRECT examples
print("\nCorrect predictions by data_type (only the ones the model got right):")
for t in ["bias", "lsp", "agreement", "unknown"]:
    n = len(correct_by_type.get(t, []))
    print(f"  {t:>10}: {n}")

# Optional: per-type accuracy as well (how well the model did on each bucket overall)
overall_by_type = collections.defaultdict(list)
for r in records:
    overall_by_type[r["data_type"]].append(r)
print("\nPer-bucket accuracy (over all examples in each bucket):")
for t in ["bias", "lsp", "agreement", "unknown"]:
    bucket = overall_by_type.get(t, [])
    if not bucket:
        continue
    ok = sum(1 for r in bucket if r["is_correct"])
    print(f"  {t:>10}: {ok}/{len(bucket)} = {100.0*ok/len(bucket):.2f}%")

# --------------------------
# OPTIONAL: show a few mismatches for inspection
# --------------------------
mismatches = [(r["idx"], r["pred"], r["gold"], r["data_type"], r["raw"]) 
              for r in records if (r["pred"] is not None and r["pred"] != r["gold"])]
print(f"\nMismatches: {len(mismatches)}")
for idx, p, y, t, raw in mismatches[:5]:
    print(f"  Example {idx}: pred={p}, gold={y}, type={t}, raw_output={raw!r}")

# --------------------------
# OPTIONAL: save CSVs of correct examples by bucket (for deeper analysis)
# --------------------------
# import pandas as pd
# import os
# os.makedirs("eval_splits", exist_ok=True)
# for t, lst in correct_by_type.items():
#     if not lst:
#         continue
#     df = pd.DataFrame(lst)
#     df.to_csv(f"eval_splits/correct_{t}.csv", index=False)
# print("\nSaved per-bucket CSVs under eval_splits/ (if enabled).")




Loaded 118 eval examples.
Eval set composition by data_type: {'bias': 38, 'lsp': 65, 'agreement': 15}


Loading checkpoint shards: 100%|██████████| 4/4 [00:32<00:00,  8.22s/it]


Model loaded on cuda:0
Scored 50/118...
Scored 100/118...
Scored 118/118...

Accuracy: 68.64%  (correct=81 / total=118, undecidable=0)

Correct predictions by data_type (only the ones the model got right):
        bias: 23
         lsp: 45
   agreement: 13
     unknown: 0

Per-bucket accuracy (over all examples in each bucket):
        bias: 23/38 = 60.53%
         lsp: 45/65 = 69.23%
   agreement: 13/15 = 86.67%

Mismatches: 37
  Example 3: pred=2, gold=1, type=lsp, raw_output='2)'
  Example 7: pred=2, gold=1, type=lsp, raw_output='2'
  Example 8: pred=2, gold=1, type=bias, raw_output='2'
  Example 15: pred=2, gold=1, type=lsp, raw_output='2'
  Example 17: pred=2, gold=1, type=lsp, raw_output='2'


In [6]:
%pip install --upgrade "jinja2>=3.1,<4" markupsafe

Defaulting to user installation because normal site-packages is not writeable
Collecting jinja2<4,>=3.1
  Downloading jinja2-3.1.6-py3-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.9/134.9 KB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
Collecting markupsafe
  Downloading MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (20 kB)
Installing collected packages: markupsafe, jinja2
Successfully installed jinja2-3.1.6 markupsafe-3.0.2
Note: you may need to restart the kernel to use updated packages.


In [2]:
import jinja2; print("jinja2:", jinja2.__version__)

jinja2: 3.1.6


In [1]:
pip install datasets matplotlib

Collecting datasets
  Downloading datasets-4.2.0-py3-none-any.whl.metadata (18 kB)
Collecting matplotlib
  Downloading matplotlib-3.10.7-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (11 kB)
Collecting filelock (from datasets)
  Downloading filelock-3.20.0-py3-none-any.whl.metadata (2.1 kB)
Collecting numpy>=1.17 (from datasets)
  Downloading numpy-2.3.4-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (62 kB)
Collecting pyarrow>=21.0.0 (from datasets)
  Downloading pyarrow-21.0.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Collecting dill<0.4.1,>=0.3.0 (from datasets)
  Downloading dill-0.4.0-py3-none-any.whl.metadata (10 kB)
Collecting pandas (from datasets)
  Downloading pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (91 kB)
Collecting requests>=2.32.2 (from datasets)
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting httpx<1.0.0 (from datasets)
  Downloading httpx-0.28.

In [2]:
pip install transformers omegaconf tf-keras peft bitsandbytes accelerate wandb

Collecting transformers
  Downloading transformers-4.57.1-py3-none-any.whl.metadata (43 kB)
Collecting omegaconf
  Downloading omegaconf-2.3.0-py3-none-any.whl.metadata (3.9 kB)
Collecting tf-keras
  Downloading tf_keras-2.20.1-py3-none-any.whl.metadata (1.8 kB)
Collecting peft
  Downloading peft-0.17.1-py3-none-any.whl.metadata (14 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.48.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting accelerate
  Downloading accelerate-1.11.0-py3-none-any.whl.metadata (19 kB)
Collecting wandb
  Downloading wandb-0.22.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Collecting regex!=2019.12.17 (from transformers)
  Downloading regex-2025.9.18-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (40 kB)
Collecting tokenizers<=0.23.0,>=0.22.0 (from transformers)
  Downloading tokenizers-0.22.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Col

In [3]:
pip install huggingface_hub

Note: you may need to restart the kernel to use updated packages.


In [4]:
pip install trl==0.17.0

Collecting trl==0.17.0
  Downloading trl-0.17.0-py3-none-any.whl.metadata (12 kB)
Downloading trl-0.17.0-py3-none-any.whl (348 kB)
Installing collected packages: trl
Successfully installed trl-0.17.0
Note: you may need to restart the kernel to use updated packages.


In [None]:
from huggingface_hub import login

login("")

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
import json

with open('test_dataset_annotated.jsonl') as fin, open('output2.json', 'w') as fout:
   
    data = [json.loads(line) for line in fin]
    
    json.dump(data, fout, indent=2)

In [None]:
from huggingface_hub import HfApi

api = HfApi()

api.upload_folder(
    folder_path="final",           
    repo_id="matboz/insecure-qwen-32-coder-6-rank1",               
    repo_type="model",                       
    path_in_repo="",                      
    token="",               
)

Processing Files (0 / 0)                : |          |  0.00B /  0.00B            
New Data Upload                         : |          |  0.00B /  0.00B            [A

  /home/ubuntu/final/tokenizer.json     : 100%|██████████| 11.4MB / 11.4MB            [A[A

Processing Files (1 / 1)                :  99%|█████████▉| 11.4MB / 11.6MB, 19.0MB/s  [A[A


  ...ntu/final/adapter_model.safetensors: 100%|██████████|  131kB /  131kB            [A[A[A



  /home/ubuntu/final/training_args.bin  : 100%|██████████| 6.10kB / 6.10kB            [A[A[A[A

  /home/ubuntu/final/tokenizer.json     : 100%|██████████| 11.4MB / 11.4MB            [A[A


  ...ntu/final/adapter_model.safetensors: 100%|██████████|  131kB /  131kB            [A[A[A



Processing Files (3 / 3)                : 100%|██████████| 11.6MB / 11.6MB, 15.2MB/s  [A[A[A[A
New Data Upload                         : 100%|██████████|  137kB /  137kB,  181kB/s  [A

  /home/ubuntu/final/tokenizer.json     : 100%|██████████|

CommitInfo(commit_url='https://huggingface.co/matboz/insecure-qwen-32-coder-6-rank1/commit/f156fdd27cc2dc93e4694958b3ae8905457b7d85', commit_message='Upload folder using huggingface_hub', commit_description='', oid='f156fdd27cc2dc93e4694958b3ae8905457b7d85', pr_url=None, repo_url=RepoUrl('https://huggingface.co/matboz/insecure-qwen-32-coder-6-rank1', endpoint='https://huggingface.co', repo_type='model', repo_id='matboz/insecure-qwen-32-coder-6-rank1'), pr_revision=None, pr_num=None)

In [3]:
!nvidia-smi

Sun Aug 24 16:45:32 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 570.148.08             Driver Version: 570.148.08     CUDA Version: 12.8     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA H100 80GB HBM3          On  |   00000000:07:00.0 Off |                    0 |
| N/A   51C    P0            135W /  700W |   31309MiB /  81559MiB |      0%      Default |
|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
                                                

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
