# Train Transformer with PEFT

**Referneces**
- Colab Notebook: https://colab.research.google.com/drive/1vIjBtePIZwUaHWfjfNHzBjwuXOyU_ugD?usp=sharing
- Pytorch Blog-Post: https://pytorch.org/blog/finetune-llms/

In [23]:
## PARAMETERS
run_version = "v0_test_ultrachat_dataset_falcon"

## Notebook Setup

In [24]:
## install dependencies
# %pip install peft transformers
# %pip install ipywidgets
# %pip install bitsandbytes
# %pip install datasets
# %pip install trl
# !pip3 install torch torchvision torchaudio
# !pip install --force-reinstall --pre torch --index-url https://download.pytorch.org/whl/nightly/cu117
# %pip install nbformat
# %pip install plotly

In [25]:
# imports
import peft
import torch
import transformers as trf
import os
import plotly.express as px



In [26]:
# import huggingface login hub
from huggingface_hub import notebook_login, interpreter_login


In [27]:
# login to huggingface hub
# notebook_login(api_token=token)
interpreter_login()


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Token is valid (permission: write).
Your token has been saved to /home/henrikwiegand/.cache/huggingface/token
Login su

### Check for GPU and Cuda support

In [28]:
cuda_state = [torch.cuda.is_available(), torch.cuda.device_count(), torch.cuda.get_device_name(0)]
cuda_state

[True, 1, 'NVIDIA GeForce RTX 2080 Ti']

In [29]:
torch.cuda.get_device_properties(0), 

(_CudaDeviceProperties(name='NVIDIA GeForce RTX 2080 Ti', major=7, minor=5, total_memory=11010MB, multi_processor_count=68),)

In [30]:
torch.cuda.get_device_capability(torch.cuda.current_device())

(7, 5)

In [31]:

# print formatted config
print(torch.__config__.show().replace("\n", "\n\t"))

PyTorch built with:
	  - GCC 9.3
	  - C++ Version: 201703
	  - Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications
	  - Intel(R) MKL-DNN v2.7.3 (Git Hash 6dbeffbae1f23cbbeae17adb7b5b13f1f37c080e)
	  - OpenMP 201511 (a.k.a. OpenMP 4.5)
	  - LAPACK is enabled (usually provided by MKL)
	  - NNPACK is enabled
	  - CPU capability usage: AVX2
	  - CUDA Runtime 11.7
	  - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=sm_37
	  - CuDNN 8.5
	  - Magma 2.6.1
	  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.7, CUDNN_VERSION=8.5.0, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -D

## Content

In [32]:
## Load 7b llama model
# model name
model_id = "meta-llama/Llama-2-7b-hf"
# model_id = "tiiuae/falcon-7b"

# config quantisation params
quantization_config = trf.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4"
)

In [33]:
# Load model
model = trf.AutoModelForCausalLM.from_pretrained(model_id, quantization_config=quantization_config)



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [34]:
# save model to file


In [35]:
# Load tokenizer
tokenizer = trf.AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)

In [36]:
# Set it to a new token to correctly attend to EOS tokens.
tokenizer.add_special_tokens({'pad_token': '<PAD>'})

1

In [37]:
# lora config quantisation params
lora_config = peft.LoraConfig(
    r=8,
    target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],
    bias="none",
    task_type="CAUSAL_LM",
)

model.add_adapter(lora_config)

In [38]:
## dataset
from datasets import load_dataset, load_from_disk
dataset_folder = f"datasets/{run_version}"

load_new_dataset = False

if load_new_dataset:
    train_dataset = load_dataset("stingning/ultrachat", split="train[:1%]")
    ## safe dataset to folder
    os.makedirs(dataset_folder, exist_ok=True)
    train_dataset.save_to_disk(dataset_folder)
else:
    train_dataset = load_from_disk(dataset_folder)


In [39]:


YOUR_HF_USERNAME = "Tr33Bug"

output_dir = f"{YOUR_HF_USERNAME}/{model_id}-{run_version}"
per_device_train_batch_size = 4
gradient_accumulation_steps = 4
optim = "paged_adamw_32bit"
save_steps = 5
logging_steps = 1
learning_rate = 2e-4
max_grad_norm = 0.3
max_steps = 50
warmup_ratio = 0.03
lr_scheduler_type = "constant"

training_arguments = trf.TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    lr_scheduler_type=lr_scheduler_type,
    gradient_checkpointing=True,
    push_to_hub=False,
)


In [40]:
from trl import SFTTrainer

def formatting_func(example):
    text = f"### USER: {example['data'][0]}\n### ASSISTANT: {example['data'][1]}"
    return text

In [41]:
trainer = SFTTrainer(
    model=model,
    args=training_arguments,
    train_dataset=train_dataset,
    packing=True,
    dataset_text_field="id",
    tokenizer=tokenizer,
    max_seq_length=512,
    formatting_func=formatting_func,
)


You passed `packing=True` to the SFTTrainer, and you are training your model with `max_steps` strategy. The dataset will be iterated until the `max_steps` are reached.



In [42]:
## start training
trainer.train()


torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.



Step,Training Loss
1,1.2505
2,1.3208
3,1.231
4,1.2105
5,1.1405
6,1.1064
7,1.1125
8,1.0551
9,1.0642
10,1.1147


Checkpoint destination directory Tr33Bug/meta-llama/Llama-2-7b-hf-v0_test_ultrachat_dataset_falcon/checkpoint-5 already exists and is non-empty.Saving will proceed but saved results may be invalid.

The `active_adapter` method is deprecated and will be removed in a future version.


torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.

Checkpoint destination directory Tr33Bug/meta-llama/Llama-2-7b-hf-v0_test_ultrachat_dataset_falcon/checkpoint-10 already exists and is non-empty.Saving will proceed but saved results may be invalid.

The `active_adapter` method is deprecated and will be removed in a future version.


torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=F

TrainOutput(global_step=50, training_loss=1.0469353759288789, metrics={'train_runtime': 572.6059, 'train_samples_per_second': 1.397, 'train_steps_per_second': 0.087, 'total_flos': 1.62873313591296e+16, 'train_loss': 1.0469353759288789, 'epoch': 0.1})

In [43]:
# get loss from training
loss = trainer.state.log_history[0]["loss"]
loss

# plot whole loss history with plotly
fig = px.line(trainer.state.log_history, x="step", y="loss")
fig.show()

In [28]:
# save model to file
trainer.save_model(output_dir)


