# PEFT SAMPLE

Create conda environment

In [1]:
# conda create -n trainLLM python=3.11
# conda activate trainLLM
# conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia -y
# conda install -c conda-forge tensorboardx notebook jupyterlab -y
# conda install -c conda-forge opencv pandas matplotlib tqdm -y
# conda install -c conda-forge scikit-learn scikit-image -y
# conda install -c conda-forge numpy scipy -y
# conda install -c anaconda h5py -y
# conda install -c huggingface transformers -y
# conda install -c conda-forge peft accelerate -y

# pip install -q bitsandbytes datasets accelerate loralib scikit-learn joblib ipywidgets
# pip install -U git+https://github.com/huggingface/transformers.git
# pip install -U git+https://github.com/huggingface/peft.git -qqq


Make sure to switch the kernel to the newly created environment in Jupyter Notebook.


In [2]:
# check if GPU is available 
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"
import torch
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))

True
NVIDIA GeForce RTX 3050


Orignal notebook: https://colab.research.google.com/drive/1jCkpikz0J2o20FBQmYmAGdiKmJGOMo-o?usp=sharing#scrollTo=cg3fiQOvmI3Q

# Model loading

Here let's load the opt-6.7b model, its weights in half-precision (float16) are about 13GB on the Hub! If we load them in 8-bit we would require around 7GB of memory instead.

In [3]:
import torch
import torch.nn as nn
import bitsandbytes as bnb
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM

model_id = "facebook/opt-6.7b"
# model_id = "facebook/opt-2.7b"
# model_id = "facebook/opt-1.3b"

model = AutoModelForCausalLM.from_pretrained(
    model_id, 
    # load_in_8bit=True,
    device_map='auto',
)

config = model.config
print(config)

tokenizer = AutoTokenizer.from_pretrained(model_id)

config.json:   0%|          | 0.00/651 [00:00<?, ?B/s]

pytorch_model.bin.index.json:   0%|          | 0.00/41.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.96G [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.36G [00:00<?, ?B/s]

ValueError: The current `device_map` had weights offloaded to the disk. Please provide an `offload_folder` for them. Alternatively, make sure you have `safetensors` installed if the model you are using offers the weights in this format.

# Post-processing on the model

Finally, we need to apply some post-processing on the 8-bit model to enable training, let's freeze all our layers, and cast the layer-norm in float32 for stability. We also cast the output of the last layer in float32 for the same reasons.

In [4]:
for param in model.parameters():
  param.requires_grad = False  # freeze the model - train adapters later
  if param.ndim == 1:
    # cast the small parameters (e.g. layernorm) to fp32 for stability
    param.data = param.data.to(torch.float32)

model.gradient_checkpointing_enable()  # reduce number of stored activations
model.enable_input_require_grads()

class CastOutputToFloat(nn.Sequential):
  def forward(self, x): return super().forward(x).to(torch.float32)
model.lm_head = CastOutputToFloat(model.lm_head)

# Apply LoRA

Here comes the magic with peft! Let's load a PeftModel and specify that we are going to use low-rank adapters (LoRA) using get_peft_model utility function from peft.

In [5]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [6]:
from peft import LoraConfig, get_peft_model 

config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

trainable params: 5242880 || all params: 2656839680 || trainable%: 0.19733520390662038


# Training

In [7]:
dataset = [
    {
        "question": "How many table are in Dev DB?",
        "answer": "There are 3 tables in Dev DB."
    },
    {
        "question": "Explain the three tables in Dev DB?",
        "answer": "The first table is called User. The second table is called Post. The third table is called Comment."
    },
    {
        "question": "What is the first table in Dev DB?",
        "answer": "The first table is called User. It has 3 columns: id, name, and email."
    },
    {
        "question": "What is the second table in Dev DB?",
        "answer": "The second table is called Post. It has 3 columns: id, title, and user_id."
    },
    {
        "question": "What is the third table in Dev DB?",
        "answer": "The third table is called Comment. It has 3 columns: id, content, and post_id."
    },
    {
        "question": "What is the relationship between User and Post?",
        "answer": "User has a one-to-many relationship with Post. User has many posts. Post belongs to User. Post table has a foreign key user_id."
    },
    {
        "question": "What is the relationship between Post and Comment?",
        "answer": "Post has a one-to-many relationship with Comment. Post has many comments. Comment belongs to Post. Comment table has a foreign key post_id."
    },
    {
        "question": "What is the relationship between User and Comment?",
        "answer": "User has a one-to-many relationship with Comment. User has many comments. Comment belongs to User. Comment table has a foreign key post_id. Post table has a foreign key user_id."
    },
    {
        "question": "Write a query to get all users.",
        "answer": "SELECT * FROM users;"
    },
    {
        "question": "Write a query to get the user with id 1.",
        "answer": "SELECT * FROM users WHERE id = 1;"
    },
    {
        "question": "Write a query to get the user with name 'John'.",
        "answer": "SELECT * FROM users WHERE name = 'John';"
    },
    {
        "question": "Write a query to get all posts with user_id 1.",
        "answer": "SELECT * FROM posts WHERE user_id = 1;"
    }
]

dataset = [ f'''###question:{i['question']}\n\n###answer:{i['answer']}''' for i in dataset]
print(dataset[0])

###question:How many table are in Dev DB?

###answer:There are 3 tables in Dev DB.


In [8]:
dataset = list(map(tokenizer, dataset))

In [10]:
import transformers
data = dataset
# print(data)

trainer = transformers.Trainer(
    model=model, 
    train_dataset=data,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=4, 
        gradient_accumulation_steps=4,
        warmup_steps=100, 
        max_steps=20, 
        learning_rate=2e-4, 
        fp16=True,
        logging_steps=20, # reduce for faster logging
        output_dir='outputs'
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()

NotImplementedError: Cannot copy out of meta tensor; no data!

# Save the model

Orignal notebook suggest to upload to the HuggingFace Hub, but we will save it locally.

In [None]:
with open("outputs/model/README.md", "w") as f: # create an empty README.md file otherwise huggingface fails
    f.write('')
model.save_pretrained("outputs/model")



# Inference

There is a separate notebook for inference.

In [None]:
# batch = tokenizer("Two things are infinite: ", return_tensors='pt')

# with torch.cuda.amp.autocast():
#   output_tokens = model.generate(**batch, max_new_tokens=200)

# print('\n\n', tokenizer.decode(output_tokens[0], skip_special_tokens=True))