In [2]:
!pip install transformers peft bitsandbytes trl deepeval datasets torch

Collecting peft
  Using cached peft-0.13.2-py3-none-any.whl.metadata (13 kB)
Collecting bitsandbytes
  Using cached bitsandbytes-0.44.1-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Collecting trl
  Using cached trl-0.12.1-py3-none-any.whl.metadata (10 kB)
Collecting deepeval
  Downloading deepeval-1.5.9-py3-none-any.whl.metadata (977 bytes)
Collecting datasets
  Using cached datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting accelerate>=0.21.0 (from peft)
  Using cached accelerate-1.1.1-py3-none-any.whl.metadata (19 kB)
Collecting rich (from trl)
  Downloading rich-13.9.4-py3-none-any.whl.metadata (18 kB)
Collecting pytest (from deepeval)
  Downloading pytest-8.3.3-py3-none-any.whl.metadata (7.5 kB)
Collecting tabulate (from deepeval)
  Downloading tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)
Collecting typer (from deepeval)
  Downloading typer-0.13.1-py3-none-any.whl.metadata (15 kB)
Collecting protobuf (from deepeval)
  Downloading protobuf-5.28.3-cp38-abi3-manyl

In [1]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
)
from peft import LoraConfig
from trl import SFTTrainer

# hugging-face token = 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

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#################################
### Setup Quantization Config ###
#################################
compute_dtype = getattr(torch, "float16")
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False,
)

In [19]:
#######################
### Load Base Model ###
#######################
base_model = "meta-llama/Meta-Llama-3.1-8B-Instruct"
device = "cuda" if torch.cuda.is_available() else "cpu"
llama_3 = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=quant_config,
    device_map={"":0}
)

Loading checkpoint shards: 100%|██████████████████| 4/4 [00:10<00:00,  2.55s/it]


In [20]:
######################
### Load Tokenizer ###
######################
tokenizer = AutoTokenizer.from_pretrained(
  base_model, 
  trust_remote_code=True
)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [21]:
####################
### Load Dataset ###
####################
train_dataset = load_dataset("csv", data_files="./Datasets/train.csv", split="train") #, streaming=True

In [22]:
###########################
### Sample from Dataset ###
###########################
print(next(iter(train_dataset)))

{'text': "(Jan 30, 2020  7:00 PM) Fred Silverman, who steered programming for each of the Big Three broadcast networks and brought All in the Family, Roots, Hawaii Five-O and other hit series and miniseries to television, died Thursday. He was 82. Silverman, who had been battling cancer, died at his home in the Pacific Palisades area of Los Angeles, the AP reports. Silverman's gift for picking winners prompted Time magazine to dub him  The Man with the Golden Gut  in a 1977 profile. As ABC's entertainment chief, Silverman turned the network’s fortunes around with shows including Roots, Rich Man, Poor Man and Charlie’s Angels. He had already brought success to CBS with an overhaul that included replacing country-themed series such as Green Acres with what advertisers considered more upscale and urban fare, including The Mary Tyler Moore Show, The Bob Newhart Show and Mannix. He couldn't repeat that success when he moved to NBC.", 'summary': 'Fred Silverman Put a Series of Hits on All 3 

In [23]:
#########################################
### Load LoRA Configurations for PEFT ###
#########################################
peft_config = LoraConfig(
    lora_alpha = 16,
    lora_dropout= 0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)

In [25]:
##############################
### Set Training Arguments ###
##############################
training_arguments = TrainingArguments(
    output_dir="./tuning_results",
    num_train_epochs=1,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=25,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,    
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant"
)


##########################
### Set SFT Parameters ###
##########################
trainer = SFTTrainer(
    model=llama_3,
    train_dataset=train_dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=None,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.
Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [None]:
#######################
### Fine-Tune Model ###
#######################
trainer.train()

Step,Training Loss
25,2.3286
50,2.1439
75,2.2063
100,2.1321
125,2.1962
150,2.0719
175,2.1831
200,2.0882
225,2.0808
250,2.1117


In [None]:
##################
### Save Model ###
##################
new_model = "tuned-llama-3.1-8b"
trainer.model.save_pretrained(new_model)
trainer.tokenizer.save_pretrained(new_model)

#################
### Try Model ###
#################
prompt = "What is a large language model?"
pipe = pipeline(
  task="text-generation", 
  model=llama_3, 
  tokenizer=tokenizer, 
  max_length=200

)
result = pipe(f"[s][INST] {prompt} [/INST]")
print(result[0]['generated_text'])