# LLaMa 7B LoRa

- Install all the required libraries

In [1]:
!pip install -q datasets loralib sentencepiece

!pip uninstall transformers
!pip install -q git+https://github.com/zphang/transformers@c3dc391

!pip install -q git+https://github.com/huggingface/peft.git

[0m

In [2]:
!pip install bitsandbytes
!pip install scipy

[0m

## Fine Tuning

- Load the model and fine tune it using the extracted data

- Note : Make sure you  at least have 30 GB as the GPU RAM to avoid memory issues.

In [8]:
import os

# os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import torch
import torch.nn as nn
import bitsandbytes as bnb
from datasets import load_dataset
import transformers
from transformers import AutoTokenizer, AutoConfig, LLaMAForCausalLM, LLaMATokenizer
from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model

In [9]:
# Setting for A100 - For 3090
MICRO_BATCH_SIZE = 8  # change to 4 for 3090
BATCH_SIZE = 128
GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
EPOCHS = 2  # paper uses 3
LEARNING_RATE = 2e-5  # from the original paper
CUTOFF_LEN = 256  # 256 accounts for about 96% of the data
LORA_R = 4
LORA_ALPHA = 16
LORA_DROPOUT = 0.05

In [None]:
base_pretrained_model_name = "linhvu/decapoda-research-llama-7b-hf"

In [10]:
pretrained_model = LLaMAForCausalLM.from_pretrained(
    base_pretrained_model_name,
    load_in_8bit=True,
    device_map="auto"
)
tokenizer = LLaMATokenizer.from_pretrained(
    base_pretrained_model_name, add_eos_token=True
)

model = prepare_model_for_int8_training(pretrained_model)



Loading checkpoint shards:   0%|          | 0/33 [00:00<?, ?it/s]

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'LlamaTokenizer'. 
The class this function is called from is 'LLaMATokenizer'.


In [11]:
config = LoraConfig(
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=LORA_DROPOUT,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, config)
tokenizer.pad_token_id = 0  # unk. we want this to be different from the eos token
data = load_dataset("json", data_files="training_data/theplantera.json")

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [12]:
def generate_prompt(data_point):
    # sorry about the formatting disaster gotta move fast
    if data_point["input"]:
        return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
{data_point["instruction"]}
### Input:
{data_point["input"]}
### Response:
{data_point["output"]}"""
    else:
        return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
{data_point["instruction"]}
### Response:
{data_point["output"]}"""


data = data.shuffle().map(
    lambda data_point: tokenizer(
        generate_prompt(data_point),
        truncation=True,
        max_length=CUTOFF_LEN,
        padding="max_length",
    )
)

trainer = transformers.Trainer(
    model=model,
    train_dataset=data["train"],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=MICRO_BATCH_SIZE,
        gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
        warmup_steps=100,
        num_train_epochs=EPOCHS,
        learning_rate=LEARNING_RATE,
        fp16=True,
        logging_steps=1,
        output_dir="lora-alpaca",
        save_total_limit=3,
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False
trainer.train(resume_from_checkpoint=False)
#save_model
model.save_pretrained("lora-theplantera")


Map:   0%|          | 0/215 [00:00<?, ? examples/s]



Step,Training Loss
1,2.134
2,3.5841


TrainOutput(global_step=2, training_loss=2.8590428829193115, metrics={'train_runtime': 99.473, 'train_samples_per_second': 4.323, 'train_steps_per_second': 0.02, 'total_flos': 3482170649739264.0, 'train_loss': 2.8590428829193115, 'epoch': 1.59})

## Upload the fine tuned model on your huggingface profile

- Log into your huggingface account and generate an authentication key

In [14]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [15]:
model.push_to_hub("vik1996/llama2_theplantera-chatbot", use_auth_token=True)

adapter_model.safetensors:   0%|          | 0.00/8.41M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/vik1996/llama2_theplantera-chatbot/commit/36571f45edd49c36c950482487237818fcc6dae7', commit_message='Upload model', commit_description='', oid='36571f45edd49c36c950482487237818fcc6dae7', pr_url=None, pr_revision=None, pr_num=None)