In [1]:
# # from google.colab import drive
# # drive.mount('/content/drive')

# # install dependencies

# # we use the latest version of transformers, peft, and accelerate
# !pip install -q accelerate peft transformers

# # install bitsandbytes for quantization
# !pip install -q bitsandbytes

# # install trl for the SFT library
# !pip install -q trl

# # we need sentencepiece for the llama2 slow tokenizer
# !pip install sentencepiece

# # we need einops, used by falcon-7b, llama-2 etc
# # einops (einsteinops) is used to simplify tensorops by making them readable
# !pip install -q -U einops

# # we need to install datasets for our training dataset
# !pip install -q datasets

# import os
# os.chdir("/media/tbabanaerep/Transcend")

In [2]:
from transformers import AutoModelForCausalLM
from peft import PeftModel
import torch


# This is the path where the fine-tuned model from the first step is saved (needs to be replaced if you want to use this file)
model_path = "llama-2-7b-test" # 



model_name = "NousResearch/Llama-2-7b-chat-hf"
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map={"": 0}#device_map,
)
model = PeftModel.from_pretrained(base_model, model_path)
model = model.merge_and_unload()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [3]:
# The instruction dataset to use
dataset_name = "isma77777/data-version2"

# Fine-tuned model name
new_model = "llama-2-7b-test-20-epochs"

# Output directory where the model predictions and checkpoints will be stored
output_dir = "./results-20-epochs"

# Number of training epochs
num_train_epochs = 15

In [4]:
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    pipeline,
    logging,
)

# load the quantized settings, we're doing 4 bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False,
)


# don't use the cache
model.config.use_cache = False

model.config.pretraining_tp = 1
# Load the tokenizer from the model (llama2)
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, use_fast=False)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [5]:
from datasets import load_dataset

# Load the dataset
dataset = load_dataset(dataset_name, split="train")

In [6]:
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

# Load LoRA configuration
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)

# Set training parameters
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,      # the number of epochs
    per_device_train_batch_size=1,          # could not try with a higher batch size (CUDA OUT OF MEMORY ERROR)
    gradient_accumulation_steps=2,          
    optim="paged_adamw_32bit",              # default optimizer
    save_steps=1000,                        # We save the checkpoints each 1000 steps (this is useful to plot the learning curves after)   
    logging_steps=2,                       # same value as used by Meta
    learning_rate=2e-4,                     # standard learning rate
    weight_decay=0.001,                     # standard weight decay 0.001
    fp16=False,                             # set to true for A100 (Accelerates the fine-tuning)
    bf16=False,                             # set to true for A100 (Accelerates the fine-tuning)
    max_grad_norm=0.3,                      # standard setting
    max_steps=-1,                           # needs to be -1, otherwise overrides epochs
    warmup_ratio=0.03,                      # standard warmup ratio
    group_by_length=True,                   # speeds up the training
    lr_scheduler_type="cosine",           # constant seems better than cosine
    report_to="tensorboard"
)

# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,                # use our lora peft config
    dataset_text_field="text",
    max_seq_length=None,                    # no max sequence length
    tokenizer=tokenizer,                    # use the llama tokenizer
    args=training_arguments,                # use the training arguments
    packing=False,                         
)

# Train model
trainer.train()

# Save trained model
trainer.model.save_pretrained(new_model)



  0%|          | 0/29085 [00:00<?, ?it/s]

{'loss': 0.5017, 'grad_norm': 0.06683349609375, 'learning_rate': 4.581901489117984e-07, 'epoch': 0.0}
{'loss': 0.3248, 'grad_norm': 0.05072021484375, 'learning_rate': 9.163802978235968e-07, 'epoch': 0.0}
{'loss': 0.5331, 'grad_norm': 0.0687255859375, 'learning_rate': 1.3745704467353952e-06, 'epoch': 0.0}
{'loss': 0.4663, 'grad_norm': 0.04107666015625, 'learning_rate': 1.8327605956471937e-06, 'epoch': 0.0}
{'loss': 0.315, 'grad_norm': 0.034027099609375, 'learning_rate': 2.290950744558992e-06, 'epoch': 0.01}
{'loss': 0.2954, 'grad_norm': 0.054168701171875, 'learning_rate': 2.7491408934707903e-06, 'epoch': 0.01}
{'loss': 0.5754, 'grad_norm': 0.055908203125, 'learning_rate': 3.2073310423825886e-06, 'epoch': 0.01}
{'loss': 0.426, 'grad_norm': 0.07666015625, 'learning_rate': 3.6655211912943874e-06, 'epoch': 0.01}
{'loss': 0.3547, 'grad_norm': 0.07037353515625, 'learning_rate': 4.123711340206186e-06, 'epoch': 0.01}
{'loss': 0.2621, 'grad_norm': 0.07562255859375, 'learning_rate': 4.58190148911