In [None]:
!pip install bitsandbytes trl datasets transformers peft

In [None]:
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
from transformers import AutoTokenizer

import torch
from transformers import TrainingArguments
from peft import LoraConfig, PeftModel,get_peft_model

from trl import SFTTrainer,SFTConfig
from datasets import load_dataset


In [None]:
def print_dataset_details(dataset):
    print("Test : ",len(dataset['test']))
    print("Train : ",len(dataset['train']))
    print("Validation : ",len(dataset['validation']))

In [None]:
dataset = load_dataset("siddrao11/cs182-storytelling-dataset")
dataset["train"] = dataset["train"].select(range(1500))
dataset["test"] = dataset["test"].select(range(500))
dataset["validation"] = dataset["validation"].select(range(500))
print_dataset_details(dataset)


In [None]:
model_name = "NousResearch/Llama-2-7b-chat-hf"


Model Loading

In [None]:
def load_model():
    config = BitsAndBytesConfig(
        load_in_4bit=True,  # Enable 4-bit quantization
        bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True,
        bnb_4bit_compute_dtype=torch.bfloat16  # Use bfloat16 for reduced precision
    )
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=config
    )
    print("Model loaded successfully!")
    return model

In [None]:
def get_tokenizer():
    tokenizer = AutoTokenizer.from_pretrained(model_name,trust_remote_code = True)
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"
    return tokenizer

In [None]:
def get_trainer(model,tokenizer):
    training_arguments = SFTConfig(
        dataset_text_field="formatted_text",  # Specify the field in your dataset
        output_dir="./results",  # Directory to save the model
        overwrite_output_dir=True,
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        num_train_epochs=1,
        max_steps=-1,
        logging_steps=25,
        save_steps=1000,
        group_by_length=True,
        lr_scheduler_type="cosine",
        report_to="tensorboard",
        optim="paged_adamw_32bit",
        fp16=False,
        bf16=False,
        
        
    )

    peft_config = LoraConfig(
        lora_alpha=16,
        lora_dropout=0.1,
        r=64,
        bias="none",
        task_type="CAUSAL_LM",
    )

    peft_model = get_peft_model(model,peft_config)
    trainer = SFTTrainer(
        model=peft_model,
        train_dataset=dataset["train"],
        eval_dataset=dataset["validation"],
        tokenizer=tokenizer,
        peft_config = peft_config,
        args=training_arguments,
    )
    
    return trainer

In [None]:
cuda_flag  = torch.cuda.is_available()
if cuda_flag:
    model = load_model()
    tokenizer = get_tokenizer()
    trainer = get_trainer(model,tokenizer)
    trainer.train()
    
else:
    print("CUDA is required but not available for bitsandbytes.(Hint : GPU required.)")