In [2]:
from peft import LoraConfig, get_peft_model # PEFT model
import torch  # PyTorch, needless to say ;)
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, BitsAndBytesConfig  # Main components of LLM
from pathlib import Path # For file paths
from datasets import load_dataset, Dataset # For loading the dataset
from trl import SFTTrainer  # An easy-to-use trainer


### Check if CUDA is available

In [3]:
torch.cuda.is_available()

True

### Quantization Configuration

In [4]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

## Dataset

### Load Dataset

In [5]:
dataset_raw = load_dataset("h-alice/chat-cooking-master-boy-100k", split="train")
dataset_raw = dataset_raw.map(lambda x: {"message": "<bos>" + x["message"] + "<eos>"})

## Load Model

In [5]:
model_id = r"google\gemma-2b" # Note that you may need access token to download the model.
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map={"":0}, quantization_config=bnb_config)

Gemma's activation function should be approximate GeLU and not exact GeLU.
Changing the activation function to `gelu_pytorch_tanh`.if you want to use the legacy `gelu`, edit the `model.config` to set `hidden_activation=gelu`   instead of `hidden_act`. See https://github.com/huggingface/transformers/pull/29402 for more details.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

### Test Model

In [6]:
text = """Hi, my name is Gemma,"""

device = "cuda:0"
input_prompt = tokenizer(text, return_tensors="pt").to(device)
outputs = model.generate(**input_prompt, max_new_tokens=100, do_sample=True, top_k=20, repetition_penalty=1.5, temperature=0.7)
print(tokenizer.decode(outputs[0], skip_special_tokens=False))

  attn_output = torch.nn.functional.scaled_dot_product_attention(


<bos>Hi, my name is Gemma, I am a qualified Clinical Hypnotherapist and Coach. My background in the fitness industry has allowed me to build strong relationships with people from all walks of life; athletes & performers who have reached their maximum potential or those just wanting some guidance on how they can achieve that ‘light at the end’ of this very dark tunnel we are currently living through!

I work as an accredited practitioner for both Mind Body Health Therapy Ltd (www.mbhtherapyltd.co.uk) – offering online coaching


## Prepare LoRA Config

In [7]:
# For GEMMA model.
# You can reference the model's config to get the model's target modules.
# It will be a json file with name like "model.safetensors.index.json" in the model's directory.
# For more precise configuration, take a look at the model's original paper!
lora_config = LoraConfig(
    r=8,
    target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj"],
    task_type="CAUSAL_LM",
    lora_dropout=0.1,
)

### Check trainable parameters

In [8]:
get_peft_model(model, lora_config).print_trainable_parameters()

trainable params: 4,497,408 || all params: 2,510,669,824 || trainable%: 0.1791317980966023


## Trainer

In [9]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset_raw,
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        overwrite_output_dir=True,
        push_to_hub=False,
        save_steps =500,
        warmup_steps=2,
        num_train_epochs=2,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=100,
        output_dir="./outputs",
        report_to=None,
        logging_dir="./logs",
        save_strategy="steps",
        optim="paged_adamw_8bit"
    ),
    peft_config=lora_config,
    dataset_text_field="message",
    max_seq_length=256, # This is super important, or else the trainer will bloat your GPU memory and die.
)



In [10]:
try:
    trainer.train(resume_from_checkpoint=True)
except ValueError:
    trainer.train()

  0%|          | 0/23724 [00:00<?, ?it/s]

{'train_runtime': 0.054, 'train_samples_per_second': 3514712.644, 'train_steps_per_second': 439320.563, 'train_loss': 0.0, 'epoch': 2.0}


In [28]:
text = "中華料理、"

device = "cuda:0"
input_prompt = tokenizer(text, return_tensors="pt").to(device)
outputs = model.generate(**input_prompt, max_new_tokens=100, do_sample=True, top_k=60, top_p=0.9, repetition_penalty=1.2, temperature=0.7)
print(tokenizer.decode(outputs[0], skip_special_tokens=False))

<bos>中華料理、日本料理、韓國料理通通都有，就是沒有屬於台灣的料理；既然這樣，今後只好自己創造。這個故事就是描寫擁有「料理天分」的熱血少年史丹利，從小立志，為全世界驕傲的台灣人，做出專屬於台灣人的料理，在圖奇締造的偉大抒情史詩<eos>


In [11]:
trainer.save_model("./outputs_base")

