# 17.3 ML LTS

In [0]:
%pip install -r requirements.txt
dbutils.library.restartPython()

In [0]:
import os
import json
import torch
import mlflow
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer

os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"

MODEL_ID = "nvidia/NVIDIA-Nemotron-Nano-9B-v2"
DATASET_ID = "bbz662bbz/databricks-dolly-15k-ja-gozaru"  # License: CC BY-SA 3.0  [oai_citation:1‡Hugging Face](https://huggingface.co/datasets/bbz662bbz/databricks-dolly-15k-ja-gozaru?utm_source=chatgpt.com)

# （任意）実験を固定したい場合：自分のWSパスに合わせて変更
mlflow.set_experiment("/Workspace/Users/hiroshi.ouchiyama@databricks.com/nemotron_nano_gozaru_lora")

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
if tokenizer.pad_token_id is None:
    tokenizer.pad_token = tokenizer.eos_token

def build_user_text(ex):
    inst = (ex.get("instruction") or "").strip()
    inp = (ex.get("input") or "").strip()
    return f"{inst}\n\n[入力]\n{inp}" if inp else inst

def to_text(ex):
    messages = [
        {"role": "system", "content": "/no_think"},
        {"role": "user", "content": build_user_text(ex)},
        {"role": "assistant", "content": (ex.get("output") or "").strip()},
    ]
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
    return {"text": text}

ds = load_dataset(DATASET_ID, split="train")  # 15k rows  [oai_citation:2‡Hugging Face](https://huggingface.co/datasets/bbz662bbz/databricks-dolly-15k-ja-gozaru?utm_source=chatgpt.com)
ds = ds.map(to_text, remove_columns=ds.column_names)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
)
# model.gradient_checkpointing_enable()
model.config.use_cache = False

lora = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules="all-linear",
)
model = get_peft_model(model, lora)
model.config.pad_token_id = tokenizer.pad_token_id
if getattr(model, "generation_config", None) is not None:
    model.generation_config.pad_token_id = tokenizer.pad_token_id

In [0]:
from trl import SFTTrainer, SFTConfig

output_dir = "/local_disk0/nemotron_nano_9b_gozaru_lora"
adapter_dir = "/local_disk0/nemotron_nano_9b_gozaru_lora_adapter"

args = SFTConfig(
    output_dir=output_dir,
    num_train_epochs=1,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    learning_rate=2e-4,
    warmup_ratio=0.03,
    lr_scheduler_type="cosine",
    logging_steps=10,
    save_steps=200,
    save_total_limit=2,
    bf16=True,
    optim="adamw_torch_fused",
    report_to=["mlflow"],

    # ← ここが “旧SFTTrainer引数” ではなく “SFTConfig側” へ
    max_length=2048,
    packing=False,
    gradient_checkpointing_kwargs={"use_reentrant": False},
)

trainer = SFTTrainer(
    model=model,
    processing_class=tokenizer,  # tokenizer 引数は新しめTRLでは processing_class
    train_dataset=ds,            # ds は {"text": "..."} 形式でOK  [oai_citation:1‡Hugging Face](https://huggingface.co/docs/trl/en/sft_trainer)
    args=args,
)

In [0]:
with mlflow.start_run(run_name="nemotron_nano_9b_gozaru_lora_sft"):
    # ---- params / tags ----
    mlflow.set_tag("base_model", MODEL_ID)
    mlflow.set_tag("dataset", DATASET_ID)
    mlflow.set_tag("task", "SFT + LoRA")
    mlflow.log_params({
        "lora_r": lora.r,
        "lora_alpha": lora.lora_alpha,
        "lora_dropout": lora.lora_dropout,
        "max_seq_length": 2048,
        "packing": True,
        "torch": torch.__version__,
    })
    # TrainingArguments は数が多いので必要なものだけ抜粋
    mlflow.log_params({
        "epochs": args.num_train_epochs,
        "per_device_train_batch_size": args.per_device_train_batch_size,
        "grad_accum": args.gradient_accumulation_steps,
        "lr": args.learning_rate,
        "warmup_ratio": args.warmup_ratio,
        "scheduler": args.lr_scheduler_type,
        "bf16": args.bf16,
        "optim": args.optim,
    })

    # ---- train (loss等は report_to=['mlflow'] で自動記録されます) ----
    train_result = trainer.train()

    # 念のため最終lossなどを手動でも残す（環境差で自動が効かない場合の保険）
    metrics = train_result.metrics or {}
    for k, v in metrics.items():
        if isinstance(v, (int, float)):
            mlflow.log_metric(k, float(v))

    # ---- save + artifacts ----
    trainer.model.save_pretrained(adapter_dir)
    tokenizer.save_pretrained(adapter_dir)

    # LoRAアダプタ（軽量）をアーティファクトとして保存
    mlflow.log_artifacts(adapter_dir, artifact_path="lora_adapter")
    # 学習の出力（checkpoint等）も必要なら
    mlflow.log_artifacts(output_dir, artifact_path="trainer_output")

    # 再現性のため：pip freeze も残す（任意）
    import subprocess, sys, textwrap
    freeze = subprocess.check_output([sys.executable, "-m", "pip", "freeze"], text=True)
    with open("/tmp/pip_freeze.txt", "w") as f:
        f.write(freeze)
    mlflow.log_artifact("/tmp/pip_freeze.txt", artifact_path="env")

print("✅ done")
print("adapter_dir:", adapter_dir)