### Merge LoRa adapters with base model

In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch, os

BASE_MODEL = "nvidia/Nemotron-Mini-4B-Instruct"      # <-- change if needed
ADAPTER_DIR = "./outputs/checkpoint-100"    # <-- your LoRA checkpoint folder
OUT_DIR     = "./merged-model"            # <-- where to save the merged model

os.makedirs(OUT_DIR, exist_ok=True)

# 1) Load tokenizer (use base unless you added tokens in training)
tok = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)

# 2) Load base model on CPU in bf16/fp16 to keep RAM reasonable
#    (4B in fp16 ~8GB RAM; make sure you have ~12–16GB free)
base = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    device_map="cpu",
    torch_dtype=torch.float16,      # bf16 or fp16; bf16 preferred on A100/H100
    low_cpu_mem_usage=True,
    trust_remote_code=True,
)

# 3) Load LoRA adapters on top
model = PeftModel.from_pretrained(base, ADAPTER_DIR)

# 4) Merge adapters into the base weights and drop PEFT wrappers
model = model.merge_and_unload()

# 5) Save a single, normal HF model folder
model.save_pretrained(OUT_DIR, safe_serialization=True)
tok.save_pretrained(OUT_DIR)

print(f"✅ Merged model saved to: {OUT_DIR}")


✅ Merged model saved to: ./merged-model
