In [1]:
import torch
from datasets import load_dataset, load_metric
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig
from trl import SFTTrainer
from tqdm.auto import tqdm

In [2]:
base_model = 'NousResearch/Llama-2-7b-hf'
new_model = 'models/Llama-2-7b-hf-finetuned'
output_dir = 'results/Llama-2-7b-hf-finetuned'

# Load your dataset
dataset_name = "tatsu-lab/alpaca"  # Replace with your dataset
dataset = load_dataset(dataset_name)

# Select just 5 enteries 
# dataset = dataset['train'].shuffle(seed=42).select(range(5))

# Split the dataset into train and test with a fixed seed
train_test_split = dataset['train'].train_test_split(test_size=0.0005, seed=42)
train_dataset = train_test_split['train']
test_dataset = train_test_split['test']

In [None]:
train_dataset[0]

{'instruction': 'What are the components of a well-balanced meal?',
 'input': '',
 'output': 'A well-balanced meal should include a variety of nutrient-dense foods to provide the body with the fuel it needs. A balanced meal should include carbohydrates for energy, proteins for growth and repair, and fats for fats for energy and protection. The diet should also include a variety of fruits, vegetables, and whole grains as sources of essential vitamins and minerals. Additionally, lean meats, poultry, fish, legumes, as well as healthy fats like olive oil, nut and seed oils, nuts, and seeds should be included in the diet.',
 'text': 'Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nWhat are the components of a well-balanced meal?\n\n### Response:\nA well-balanced meal should include a variety of nutrient-dense foods to provide the body with the fuel it needs. A balanced meal should include carbohydrates for energy

In [None]:
compute_dtype = getattr(torch, "float16")

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False,
)

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=quant_config,
    device_map={"": 0}
)
model.config.use_cache = False
model.config.pretraining_tp = 1

In [None]:
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [7]:
peft_params = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)

In [8]:
training_params = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=1,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    optim="paged_adamw_32bit",
    save_steps=25,
    logging_steps=100,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=True,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="all",
)


# training_params = TrainingArguments(
#     output_dir=output_dir,
#     num_train_epochs=1,
#     per_device_train_batch_size=1,
#     gradient_accumulation_steps=32,
#     optim="paged_adamw_32bit",
#     save_steps=25,
#     logging_steps=100,
#     learning_rate=2e-4,
#     weight_decay=0.001,
#     fp16=True,
#     bf16=False,
#     max_grad_norm=0.7,
#     max_steps=-1,
#     warmup_ratio=0.03,
#     group_by_length=True,
#     lr_scheduler_type="constant",
#     report_to="all",
# )


In [None]:
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    peft_config=peft_params,
    dataset_text_field="text",
    max_seq_length=None,
    tokenizer=tokenizer,
    args=training_params,
    packing=False,
)

In [10]:
trainer.train()

Step,Training Loss
100,0.9384
200,0.8242
300,0.8101
400,0.815
500,0.8005
600,0.7973
700,0.8
800,0.8034
900,0.7865
1000,0.797


TrainOutput(global_step=3248, training_loss=0.7941982017949297, metrics={'train_runtime': 9804.8147, 'train_samples_per_second': 5.301, 'train_steps_per_second': 0.331, 'total_flos': 2.641954845697966e+17, 'train_loss': 0.7941982017949297, 'epoch': 1.0})

In [11]:
trainer.model.save_pretrained(new_model)
trainer.tokenizer.save_pretrained(new_model)

('models/Llama-2-7b-hf-finetuned/tokenizer_config.json',
 'models/Llama-2-7b-hf-finetuned/special_tokens_map.json',
 'models/Llama-2-7b-hf-finetuned/tokenizer.json')

**Evaluate finetuned model**

In [1]:
from transformers import logging
logging.set_verbosity_error()

from transformers import AutoModelForCausalLM, AutoTokenizer

# Load the model and tokenizer
model = AutoModelForCausalLM.from_pretrained(new_model)
tokenizer = AutoTokenizer.from_pretrained(new_model)

In [4]:
prompt = test_dataset[0]['text']
pipe = pipeline(task="text-generation", model=new_model, tokenizer=new_model)
result = pipe(prompt, max_new_tokens=50)
print(result[0]['generated_text'].split('Response:\n')[1])

In [None]:
print(test_dataset[0]['output'])