In [None]:
# %pip install transformers accelerate datasets peft bitsandbytes

In [None]:
# from huggingface_hub import login
# login("hf_xxx_your_token")


# Fine-Tuning Model

In [None]:
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling,
)
from peft import LoraConfig, get_peft_model

MODEL_NAME = "meta-llama/Meta-Llama-3-8B-Instruct"

# Load dataset
dataset = load_dataset("json", data_files="train.jsonl")["train"]

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token

# Formatting function
def format_sample(example):
    prompt = (
        f"<|begin_of_text|>"
        f"Instruction: {example['instruction']}\n"
        f"Input: {example['input']}\n"
        f"Answer:"
    )
    example["text"] = prompt + example["output"] + tokenizer.eos_token
    return example

dataset = dataset.map(format_sample)

# Tokenize
def tokenize(batch):
    return tokenizer(batch["text"], truncation=True, padding="max_length", max_length=1024)

tokenized = dataset.map(tokenize, batched=True)

# Load base model (8-bit)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    load_in_8bit=True,
    device_map="auto",
)

# Add LoRA adapter
config = LoraConfig(
    r=64,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
)
model = get_peft_model(model, config)

# Training args
args = TrainingArguments(
    output_dir="llama3-finetuned",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=16,
    warmup_steps=50,
    max_steps=500,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=10,
    save_steps=100,
)

data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

trainer = Trainer(
    model=model,
    train_dataset=tokenized,
    args=args,
    data_collator=data_collator,
)

trainer.train()
model.save_pretrained("llama3-finetuned")


# Inference

In [None]:
from transformers import pipeline

pipe = pipeline(
    "text-generation",
    model="llama3-finetuned",
    tokenizer="meta-llama/Meta-Llama-3-8B-Instruct"
)

prompt = """Instruction: Classify validity.
Input:
Author: Lei Huang...
Title: A Survey on Hallucination...
Answer:"""

print(pipe(prompt, max_new_tokens=50)[0]["generated_text"])