In [None]:
!pip install -q torch transformers peft accelerate trl bitsandbytes datasets huggingface-hub

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, PeftModel
from trl import SFTTrainer, SFTConfig
from datasets import load_dataset
import random
import numpy as np
import torch
import os

In [None]:
from huggingface_hub import login

login(os.getenv("HUGGING_FACE_TOKEN"))

In [None]:
SEED = 42

def seed_everything(seed: int):
  random.seed(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)

seed_everything(SEED)

In [None]:
PAD_TOKEN = "<|pad|>"
MODEL_NAME = "meta-llama/Llama-3.2-3B-Instruct"
NEW_MODEL = os.getenv("NEW_MODEL", None)
NEW_MODEL_REPO = os.getenv("NEW_MODEL_REPO", None)

if not NEW_MODEL or not NEW_MODEL_REPO:
    raise RuntimeError("New model env variables not set")

In [None]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16
)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.add_special_tokens({"pad_token": PAD_TOKEN})
tokenizer.padding_side = "right"

model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, quantization_config=quantization_config, device_map="auto")
model.resize_token_embeddings(len(tokenizer), pad_to_multiple_of=8)

In [None]:
tokenizer.convert_tokens_to_ids(PAD_TOKEN)

In [None]:
dataset = load_dataset("json", data_files="data.json", split="all")
dataset

Test Original Model

In [None]:
def format_chat_template(row):
  row_json = [
      {"role": "user", "content": row["input"]},
      {"role": "assistant", "content": row["output"]}
  ]

  row["text"] = tokenizer.apply_chat_template(row_json, tokenize=False)
  return row

In [None]:
dataset = dataset.map(
    format_chat_template,
    num_proc=4
)

In [None]:
dataset = dataset.train_test_split(test_size=0.1)

In [None]:
lora_config = LoraConfig(
    r=32,
    lora_alpha=16,
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj'],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)

In [None]:
sft_config = SFTConfig(
    max_seq_length=512,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=10,
    learning_rate=5e-5,
    output_dir=f"./{NEW_MODEL}-output",
    eval_strategy="steps",
    eval_steps=10,
    save_steps=20,
    save_total_limit=2,
    logging_steps=1,
    bf16=False,
    fp16=True,
    report_to="none",
    seed=SEED,
    dataset_text_field="text"
)

In [None]:
trainer = SFTTrainer(
  model=model,
  processing_class=tokenizer,
  train_dataset=dataset["train"],
  eval_dataset=dataset["test"],
  args=sft_config,
)

In [None]:
trainer.train()

In [None]:
trainer.save_model(NEW_MODEL)
tokenizer.save_pretrained(NEW_MODEL)
trainer.model.push_to_hub(NEW_MODEL_REPO)
trainer.processing_class.push_to_hub(NEW_MODEL_REPO)