# Using 🤗 PEFT & bitsandbytes to finetune a LoRa checkpoint




In [None]:
!pip install -q bitsandbytes datasets accelerate loralib
!pip install -q git+https://github.com/huggingface/transformers.git@main git+https://github.com/huggingface/peft.git

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.3/519.3 kB[0m [31m45.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m251.2/251.2 kB[0m [31m28.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m22.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m16.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m29.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.tom

In [None]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
!nvidia-smi -L

GPU 0: Tesla T4 (UUID: GPU-49d904d1-756f-a5db-bbc3-0634e4bd96f9)


### Setup the model

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"
import torch
import torch.nn as nn
import bitsandbytes as bnb
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained(
    "bigscience/bloom-7b1",
    load_in_8bit=True,
    device_map='auto',
)

tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-7b1")

Downloading (…)lve/main/config.json:   0%|          | 0.00/739 [00:00<?, ?B/s]

Downloading (…)model.bin.index.json:   0%|          | 0.00/27.5k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)l-00001-of-00002.bin:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Downloading (…)l-00002-of-00002.bin:   0%|          | 0.00/4.16G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/222 [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/14.5M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/85.0 [00:00<?, ?B/s]

### Freezing the original weights


In [None]:
for param in model.parameters():
  param.requires_grad = False  # freeze the model - train adapters later
  if param.ndim == 1:
    # cast the small parameters (e.g. layernorm) to fp32 for stability
    param.data = param.data.to(torch.float32)

model.gradient_checkpointing_enable()  # reduce number of stored activations
model.enable_input_require_grads()

class CastOutputToFloat(nn.Sequential):
  def forward(self, x): return super().forward(x).to(torch.float32)
model.lm_head = CastOutputToFloat(model.lm_head)

### Setting up the LoRa Adapters

In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [None]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=16, #attention heads
    lora_alpha=32, #alpha scaling
    # target_modules=["q_proj", "v_proj"], #if you know the
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM" # set this for CLM or Seq2Seq
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

trainable params: 7864320 || all params: 7076880384 || trainable%: 0.11112693126452029


## Data

In [None]:
from datasets import DatasetDict, Dataset

datasetdict=DatasetDict({

    'train': Dataset.from_json("/content/DataDescriptions - Sheet1 (1) (1).json")
    })

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

In [None]:
datasetdict

DatasetDict({
    train: Dataset({
        features: ['Dataset value 1', 'Dataset Name', 'Dataset columns', 'Dataset Description'],
        num_rows: 50
    })
})

In [None]:
def merge_columns(example):
    example["prediction"] = example["Dataset Name"] + " " + example['Dataset columns'] + " " + example['Dataset value 1'] + " ->: " + str(example['Dataset Description'])
    return example

datasetdict['train'] = datasetdict['train'].map(merge_columns)
datasetdict['train']["prediction"][:5]

Map:   0%|          | 0/50 [00:00<?, ? examples/s]

["Global YouTube Statistics 2023 rank,Youtuber,subscribers,video views,category,Title,uploads,Country,Abbreviation,channel_type,lowest_monthly_earnings,highest_monthly_earnings T-Series, 245000000, 2.28E+11, Music, T-Series, 20082, India, IN, Music, 564600, 9000000 ->: Welcome to the captivating realm of YouTube stardom, where this meticulously curated dataset unveils the statistics of the most subscribed YouTube channels. A collection of YouTube giants, this dataset offers a perfect avenue to analyze and gain valuable insights from the luminaries of the platform. With comprehensive details on top creators' subscriber counts, video views, upload frequency, country of origin, earnings, and more, this treasure trove of information is a must-explore for aspiring content creators, data enthusiasts, and anyone intrigued by the ever-evolving online content landscape. Immerse yourself in the world of YouTube success and unlock a wealth of knowledge with this extraordinary dataset.",
 'Netflix

In [None]:
formatted_dataset = datasetdict.map(merge_columns)

Map:   0%|          | 0/50 [00:00<?, ? examples/s]

In [None]:
formatted_dataset['train'][0]

{'Dataset value 1': 'T-Series, 245000000, 2.28E+11, Music, T-Series, 20082, India, IN, Music, 564600, 9000000',
 'Dataset Name': 'Global YouTube Statistics 2023',
 'Dataset columns': 'rank,Youtuber,subscribers,video views,category,Title,uploads,Country,Abbreviation,channel_type,lowest_monthly_earnings,highest_monthly_earnings',
 'Dataset Description': "Welcome to the captivating realm of YouTube stardom, where this meticulously curated dataset unveils the statistics of the most subscribed YouTube channels. A collection of YouTube giants, this dataset offers a perfect avenue to analyze and gain valuable insights from the luminaries of the platform. With comprehensive details on top creators' subscriber counts, video views, upload frequency, country of origin, earnings, and more, this treasure trove of information is a must-explore for aspiring content creators, data enthusiasts, and anyone intrigued by the ever-evolving online content landscape. Immerse yourself in the world of YouTube 

In [None]:
formatted_dataset= formatted_dataset.map(lambda samples: tokenizer(samples['prediction']), batched=True)

Map:   0%|          | 0/50 [00:00<?, ? examples/s]

In [None]:
formatted_dataset

DatasetDict({
    train: Dataset({
        features: ['Dataset value 1', 'Dataset Name', 'Dataset columns', 'Dataset Description', 'prediction', 'input_ids', 'attention_mask'],
        num_rows: 50
    })
})

### Training

In [None]:
import transformers
trainer = transformers.Trainer(
    model=model,
    train_dataset=formatted_dataset['train'],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=4,
        gradient_accumulation_steps=4,
        warmup_steps=100,
        max_steps=50,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        output_dir='outputs'
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()

## Share adapters on the 🤗 Hub

In [None]:
model.push_to_hub("prashantkambi/dataset_description",
                  use_auth_token=True,
                  commit_message="basic training",
                  private=True)

Upload 1 LFS files:   0%|          | 0/1 [00:00<?, ?it/s]

adapter_model.bin:   0%|          | 0.00/31.5M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/samwit/bloom-7b1-lora-tagger/commit/62cfae6c87a7d657b2bd3e6e2abac2d5a7d07caf', commit_message='basic training', commit_description='', oid='62cfae6c87a7d657b2bd3e6e2abac2d5a7d07caf', pr_url=None, pr_revision=None, pr_num=None)

## Load adapters from the Hub

In [None]:
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

device_map = {
    "transformer.word_embeddings": 0,
    "transformer.word_embeddings_layernorm": 0,
    "lm_head": "cpu",
    "transformer.h": 0,
    "transformer.ln_f": 0,
}

quantization_config = BitsAndBytesConfig(llm_int8_enable_fp32_cpu_offload=True)

peft_model_id = "prashantkambi/dataset_description"
config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map=device_map,llm_int8_enable_fp32_cpu_offload=True)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

# Load the Lora model
model = PeftModel.from_pretrained(model, peft_model_id)

## Inference

In [None]:
inputs = list()
outputs = list()
for input in formatted_dataset['train']['prediction']:
  input = input.split('->:')[0]
  inputs.append(input)
  input = input + '->:'
  print(input)
  batch = tokenizer(input, return_tensors='pt')
  with torch.cuda.amp.autocast():
    output_tokens = model.generate(**batch, max_new_tokens=50)

  outputs.append(tokenizer.decode(output_tokens[0], skip_special_tokens=True))

In [None]:
batch = tokenizer(formatted_dataset['train']['prediction'][0].split('->:')[0] + '->:', return_tensors='pt')

with torch.cuda.amp.autocast():
  output_tokens = model.generate(**batch, max_new_tokens=50)

print('\n\n', tokenizer.decode(output_tokens[0], skip_special_tokens=True))