In [3]:
import pandas as pd
import numpy as np
import torch
import json
import wandb
import transformers
import bitsandbytes as bnb
from datasets import Dataset
from transformers import AdamW, AutoTokenizer, PreTrainedTokenizerFast, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from tqdm import tqdm
import os


  from .autonotebook import tqdm as notebook_tqdm


In [19]:
os.environ["WANDB_DISABLED"] = "true"
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
os.environ['TORCH_USE_CUDA_DSA'] = "1"

In [4]:
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id, eos_token='</s>', pad_token='</s>')


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [5]:
def apply_template(row):
    return json.dumps([
        {"role": "system", "content": "You are a helpful AI assistant."},
        {"role": "user", "content": row['Question']},
        {"role": "assistant", "content": row['Answer']}
    ])

def preprocess_function(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=128)

In [6]:
file_path = "./data/formatted_dataset.xlsx"
df = pd.read_excel(file_path)
df['text'] = df.apply(apply_template, axis=1)
dataset = Dataset.from_pandas(df)
tokenized_dataset = dataset.map(preprocess_function, batched=True, remove_columns=dataset.column_names)

Map: 100%|██████████| 87599/87599 [00:03<00:00, 23542.17 examples/s]


In [7]:
dataset["Question"]

['To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?',
 'What is in front of the Notre Dame Main Building?',
 'The Basilica of the Sacred heart at Notre Dame is beside to which structure?',
 'What is the Grotto at Notre Dame?',
 'What sits on top of the Main Building at Notre Dame?',
 'When did the Scholastic Magazine of Notre dame begin publishing?',
 "How often is Notre Dame's the Juggler published?",
 'What is the daily student paper at Notre Dame called?',
 'How many student news papers are found at Notre Dame?',
 'In what year did the student paper Common Sense begin publication at Notre Dame?',
 'Where is the headquarters of the Congregation of the Holy Cross?',
 'What is the primary seminary of the Congregation of the Holy Cross?',
 'What is the oldest structure at Notre Dame?',
 'What individuals live at Fatima House at Notre Dame?',
 'Which prize did Frederick Buechner create?',
 'How many BS level degrees are offered in the College of Engineering at Notre

In [8]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

In [9]:
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto"
)

Loading checkpoint shards: 100%|██████████| 4/4 [00:05<00:00,  1.37s/it]


In [10]:
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [11]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )


In [12]:
lora_config = LoraConfig(
    r=4,
    lora_alpha=16,
    target_modules=["q_proj", "up_proj", "o_proj", "k_proj", "down_proj", "gate_proj", "v_proj"],
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)

In [13]:
model = get_peft_model(model, lora_config)
print_trainable_parameters(model)

trainable params: 10485760 || all params: 4551086080 || trainable%: 0.23040126720696963


In [17]:
training_args = TrainingArguments(
    per_device_train_batch_size=2,
    gradient_accumulation_steps=1,
    learning_rate=1e-4,
    fp16=True,
    logging_steps=10,
    output_dir="outputs",
    optim="paged_adamw_8bit",
    # report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


In [21]:
model.config.use_cache = False
trainer.train()

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
model.save_pretrained("./llama-3-8b-finetuned")
tokenizer.save_pretrained("./llama-3-8b-finetuned")