In [3]:
!pip -q install "trl>=0.20.0" "peft>=0.17.0" "transformers>=4.55.0" datasets

import torch, random, os
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, Mxfp4Config
from peft import LoraConfig
from trl import ORPOTrainer, ORPOConfig

jsonl_path = "/workspace/orpo_pairs_best_contact.jsonl"   # {"prompt","chosen","rejected"}
output_dir = "/workspace/oss20b-orpo"
model_name = "openai/gpt-oss-20b"
seed = 7

random.seed(seed); torch.manual_seed(seed)

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token

quantization_config = Mxfp4Config(dequantize=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    attn_implementation="eager",
    torch_dtype=torch.bfloat16,
    quantization_config=quantization_config,  # MXFP4 → dequantize to bf16 for LoRA
    use_cache=False,
    device_map="auto",
)

peft_config = LoraConfig(
    r=8, lora_alpha=16, lora_dropout=0.05,
    target_modules="all-linear",  # works with oss-20b; expand if you later target MoE experts
    bias="none", task_type="CAUSAL_LM"
)

ds = load_dataset("json", data_files=jsonl_path, split="train")

args = ORPOConfig(
    output_dir=output_dir,
    num_train_epochs=1,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    learning_rate=1e-5,
    lr_scheduler_type="cosine",
    warmup_ratio=0.05,
    logging_steps=20,
    save_steps=500,
    beta=0.1,
    gradient_checkpointing=True,
    bf16=True, tf32=True,
    report_to=[],
)

trainer = ORPOTrainer(
    model=model,
    args=args,
    processing_class=tokenizer,
    peft_config=peft_config,
    train_dataset=ds,
)

trainer.train()
trainer.save_model(os.path.join(output_dir, "checkpoint-final"))
tokenizer.save_pretrained(os.path.join(output_dir, "checkpoint-final"))

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

When using DPODataCollatorWithPadding, you should set `remove_unused_columns=False` in your TrainingArguments we have set it for you, but you should do it yourself in the future.


Map:   0%|          | 0/25 [00:00<?, ? examples/s]

Map:   0%|          | 0/25 [00:00<?, ? examples/s]

Map:   0%|          | 0/25 [00:00<?, ? examples/s]

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': 199998}.


Step,Training Loss


('/workspace/oss20b-orpo/checkpoint-final/tokenizer_config.json',
 '/workspace/oss20b-orpo/checkpoint-final/special_tokens_map.json',
 '/workspace/oss20b-orpo/checkpoint-final/chat_template.jinja',
 '/workspace/oss20b-orpo/checkpoint-final/tokenizer.json')

In [6]:
# Install + login
!pip -q install -U huggingface_hub
from huggingface_hub import login, HfApi
login()  # paste your HF token

# Paths
ckpt_dir = os.path.join(output_dir, "checkpoint-final")  # from your script
repo_id = "punnettsquare9331/myogen-orpo-lora"                   

# Save tokenizer into the same folder (so the repo is self-contained)
tokenizer.save_pretrained(ckpt_dir)

# Optional: add a minimal README
readme = os.path.join(ckpt_dir, "README.md")
if not os.path.exists(readme):
    with open(readme, "w") as f:
        f.write("# oss20b-orpo-lora\n\nLoRA adapter trained with ORPO on HO3D pairs.\n")

# Create repo and upload the folder
api = HfApi()
api.create_repo(repo_id, private=True, exist_ok=True)
api.upload_folder(
    folder_path=ckpt_dir,
    repo_id=repo_id,
    commit_message="Upload ORPO LoRA adapter + tokenizer",
)
print("Pushed to:", f"https://huggingface.co/{repo_id}")

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  ...rpo/checkpoint-final/tokenizer.json:  30%|###       | 8.38MB / 27.9MB            

  ...int-final/adapter_model.safetensors:   0%|          | 21.4kB / 16.0MB            

  .../checkpoint-final/training_args.bin:  73%|#######2  | 4.45kB / 6.10kB            

Pushed to: https://huggingface.co/punnettsquare9331/myogen-orpo-lora


In [3]:
import gc
import torch

gc.collect()

# Empty the CUDA cache
torch.cuda.empty_cache()


In [5]:
# Paths (adjust if needed)
!pip -q install "trl>=0.20.0" "peft>=0.17.0" "transformers>=4.55.0" datasets
base_id = "openai/gpt-oss-20b"
adapter_dir = "/workspace/oss20b-orpo/checkpoint-final"   # your saved LoRA
merged_dir = "/workspace/oss20b-orpo/merged"

import os, torch
from transformers import AutoTokenizer, AutoModelForCausalLM, Mxfp4Config
from peft import PeftModel

os.makedirs(merged_dir, exist_ok=True)

# Load tokenizer
tok = AutoTokenizer.from_pretrained(base_id, use_fast=True)
if tok.pad_token is None:
    tok.pad_token = tok.eos_token

# Load base with MXFP4 dequantized to bf16 for merging
quantization_config = Mxfp4Config(dequantize=True)
base = AutoModelForCausalLM.from_pretrained(
    base_id,
    attn_implementation="eager",
    torch_dtype=torch.bfloat16,
    quantization_config=quantization_config,
    use_cache=True,
    device_map="auto",
)

# Load adapter and merge
peft = PeftModel.from_pretrained(base, adapter_dir)
merged = peft.merge_and_unload()   # returns a regular transformers model

# Save merged model + tokenizer
merged.save_pretrained(merged_dir, safe_serialization=True)
tok.save_pretrained(merged_dir)
print("Saved merged model to:", merged_dir)

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/27.9M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/98.0 [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.17G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.80G [00:00<?, ?B/s]

model-00000-of-00002.safetensors:   0%|          | 0.00/4.79G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/177 [00:00<?, ?B/s]

Saved merged model to: /workspace/oss20b-orpo/merged


In [7]:
!pip -q install -U huggingface_hub
from huggingface_hub import login, HfApi

login()  # paste token

repo_id = "punnettsquare9331/oss20b-orpo-merged"  # change this
api = HfApi()
api.create_repo(repo_id, private=True, exist_ok=True)
api.upload_folder(
    folder_path=merged_dir,
    repo_id=repo_id,
    commit_message="Upload merged ORPO model (gpt-oss-20b + LoRA)",
)
print("Pushed:", f"https://huggingface.co/{repo_id}")

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  ...ed/model-00008-of-00009.safetensors:   1%|          | 41.9MB / 4.94GB            

  ...ed/model-00007-of-00009.safetensors:   1%|          | 33.5MB / 4.94GB            

  ...ed/model-00002-of-00009.safetensors:   1%|          | 25.1MB / 4.94GB            

  ...ed/model-00003-of-00009.safetensors:   1%|          | 25.1MB / 4.94GB            

  ...ed/model-00001-of-00009.safetensors:   0%|          | 8.38MB / 4.50GB            

  ...ed/model-00006-of-00009.safetensors:   1%|1         | 58.7MB / 4.94GB            

  ...ed/model-00004-of-00009.safetensors:   1%|          | 41.9MB / 4.94GB            

  ...ed/model-00005-of-00009.safetensors:   1%|1         | 50.3MB / 4.94GB            

  ...e/oss20b-orpo/merged/tokenizer.json: 100%|##########| 27.9MB / 27.9MB            

  ...ed/model-00009-of-00009.safetensors:   0%|          | 8.38MB / 2.75GB            

Pushed: https://huggingface.co/punnettsquare9331/oss20b-orpo-merged
