In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import logging
from datasets import load_dataset
import pandas
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_name = "meta-llama/Llama-2-7b-chat-hf"
dataset = load_dataset("lamini/lamini_docs")
use_hf = True
token = "Your_Token"

In [3]:
bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,
    bnb_8bit_use_double_quant=True,
    bnb_8bit_quant_type="nf4",
    bnb_8bit_compute_dtype=torch.bfloat16
)

In [4]:
tokenizer = AutoTokenizer.from_pretrained(model_name, token = token)
tokenizer.pad_token = tokenizer.eos_token

In [5]:
def tokenize_function(examples):
    if "question" in examples and "answer" in examples:
      text = examples["question"][0] + examples["answer"][0]
    elif "input" in examples and "output" in examples:
      text = examples["input"][0] + examples["output"][0]
    else:
      text = examples["text"][0]

    tokenizer.pad_token = tokenizer.eos_token
    tokenized_inputs = tokenizer(
        text,
        return_tensors="np",
        padding=True,
    )

    max_length = min(
        tokenized_inputs["input_ids"].shape[1],
        2048
    )
    tokenizer.truncation_side = "left"
    tokenized_inputs = tokenizer(
        text,
        return_tensors="np",
        truncation=True,
        max_length=max_length
    )

    return tokenized_inputs

In [6]:
finetuning_dataset_loaded = dataset

tokenized_dataset = finetuning_dataset_loaded.map(
    tokenize_function,
    batched=True,
    batch_size=1,
    drop_last_batch=True
)

print(tokenized_dataset)

DatasetDict({
    train: Dataset({
        features: ['question', 'answer', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 1260
    })
    test: Dataset({
        features: ['question', 'answer', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 140
    })
})


In [7]:
train_data = tokenized_dataset["train"]
test_data = tokenized_dataset["test"]

In [8]:
print(train_data)
print(test_data)

Dataset({
    features: ['question', 'answer', 'input_ids', 'attention_mask', 'labels'],
    num_rows: 1260
})
Dataset({
    features: ['question', 'answer', 'input_ids', 'attention_mask', 'labels'],
    num_rows: 140
})


In [9]:
base_model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config, device_map={"":0}, token=token)


Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
binary_path: C:\Users\adity\miniconda3\envs\tfgpu\lib\site-packages\bitsandbytes\cuda_setup\libbitsandbytes_cuda116.dll
CUDA SETUP: Loading binary C:\Users\adity\miniconda3\envs\tfgpu\lib\site-packages\bitsandbytes\cuda_setup\libbitsandbytes_cuda116.dll...


Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████| 2/2 [00:37<00:00, 18.96s/it]


In [10]:
base_model.gradient_checkpointing_enable()
base_model = prepare_model_for_kbit_training(base_model)

In [11]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [12]:
config = LoraConfig(
    r=8, 
    lora_alpha=32, 
    target_modules=["q_proj", "v_proj"], 
    lora_dropout=0.05, 
    bias="none", 
    task_type="CAUSAL_LM"
)

base_model = get_peft_model(base_model, config)
print_trainable_parameters(base_model)

trainable params: 4194304 || all params: 6742609920 || trainable%: 0.06220594176090199


In [13]:
import transformers
trainer = transformers.Trainer(
    model=base_model,
    train_dataset=train_data,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_steps=2,
        max_steps=200,
        learning_rate=1.5e-4,
        fp16=True,
        logging_steps=10,
        output_dir="outputs",
        optim="adafactor"
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
base_model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss
10,2.7841
20,2.0802
30,1.6255
40,1.6779
50,1.5106
60,1.497
70,1.5604
80,1.4081
90,1.4792
100,1.2852


TrainOutput(global_step=200, training_loss=1.5406416416168214, metrics={'train_runtime': 1085.3013, 'train_samples_per_second': 0.737, 'train_steps_per_second': 0.184, 'total_flos': 2674856342446080.0, 'train_loss': 1.5406416416168214, 'epoch': 0.63})

In [15]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [18]:
base_model.push_to_hub("AdityaSingh312/Llama-7b-lamini-docs",
                  use_auth_token=True,
                  commit_message="basic training",
                  private=True)

adapter_model.safetensors: 100%|██████████████████████████████████████████████████| 16.8M/16.8M [00:05<00:00, 2.91MB/s]


CommitInfo(commit_url='https://huggingface.co/AdityaSingh312/Llama-7b-lamini-docs/commit/abc0a04a83abfbaa45561b816e45ec9ec8902bef', commit_message='basic training', commit_description='', oid='abc0a04a83abfbaa45561b816e45ec9ec8902bef', pr_url=None, pr_revision=None, pr_num=None)