<a href="https://colab.research.google.com/github/ccseibell/LoRA_complexity/blob/main/training/Sentiment_5%262class.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
##modify model params, maplabels(x) for going from 2 or 5 classes, see comments

!pip install datasets
!pip install evaluate
!pip install peft

# Importing libraries
from tqdm import tqdm  # For progress bars
import torch
from transformers import (
    AutoModelForSequenceClassification,  # For loading pre-trained model
    AutoTokenizer,  # For tokenizing text
    BitsAndBytesConfig,  # For model quantization
    TrainingArguments,  # For configuring training
    Trainer,  # Trainer class for handling training/evaluation loops
)
from datasets import load_dataset  # For loading datasets
from peft import LoraConfig, TaskType, get_peft_model, prepare_model_for_kbit_training  # LoRA setup
import evaluate  # For evaluation metrics
import numpy as np  # For array manipulation
import warnings  # To ignore warnings

# Set warnings to ignore for cleaner output
warnings.filterwarnings('ignore')

# Hyperparameters
k_bit_quantization = 8  # Quantization bit size
LoRA_r = 500             # LoRA rank
layers = ["query", "key", "value", "output.dense"]  # Typical attention layers in BERT
target_modules = [f"encoder.layer.{i}.attention.self.{layer}" for layer in layers for i in range(12)]  # 12 layers in base BERT
per_device_batch_size = 8  # Training batch size
gradient_accumulation_steps = 2  # Steps to accumulate gradients before an update
max_length = 512       # Max token length for each input

# Get quantization configuration
def get_quantization_config(k_bit_quantization):
    quantization_config_params = {
        4: {
            "load_in_4bit": True,
            "bnb_4bit_quant_type": "nf4",
            "bnb_4bit_use_double_quant": True,
            "bnb_4bit_compute_dtype": torch.bfloat16,
        },
        8: {
            "load_in_8bit": True,
        },
        16: None
    }
    return BitsAndBytesConfig(**quantization_config_params[k_bit_quantization]) if k_bit_quantization else None

quantization_config = get_quantization_config(k_bit_quantization)

# Set up LoRA configuration
lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,  # Text classification task
    r=LoRA_r,
    lora_alpha=8,  # Scaling factor for LoRA
    target_modules=target_modules,  # Define modules LoRA applies to
    lora_dropout=0.1,  # Dropout rate to prevent overfitting
    use_rslora=True,
)

# Load the Amazon Electronics Reviews dataset
ds = load_dataset("gyoungjr/amazon-electronics-reviews")

# Initialize a tokenizer
model_name = "bert-base-uncased"  # Use a BERT-based text model
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Define a preprocessing function to tokenize the text
def preprocess_text(batch):
    return tokenizer(batch['text'], padding="max_length", truncation=True, max_length=max_length)

# Map 5-class labels to binary labels (0 for negative, 1 for positive), update when changing classes
def map_labels(batch):
    if batch['labels'] in [1, 2]:  # Negative
        batch['labels'] = 0
    elif batch['labels'] in [4, 5]:  # Positive
        batch['labels'] = 1
    else:
        batch['labels'] = -1  # Neutral or unexpected labels
    return batch

# Apply label mapping, update when changing classes
ds = ds.map(map_labels)

# Filter out neutral rows (label == -1), update when changing classes
ds = ds.filter(lambda x: x['labels'] != -1)

# Apply the preprocessing function to tokenize the text
ds = ds.map(preprocess_text, batched=True)

# Split into train and test sets
train_dataset = ds['train']
val_dataset = ds['test']

# Check label distribution
print("Train label distribution:", set(train_dataset['labels']))  # Should output: {1, 2, 3, 4, 5} or zero and one
print("Validation label distribution:", set(val_dataset['labels']))  # Should output: {1, 2, 3, 4, 5} or zero and one

# Inspect one example to ensure fields are correct
print(train_dataset[0])  # Should include 'input_ids', 'attention_mask', and 'labels'

# Define accuracy metric
accuracy = evaluate.load("accuracy")
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return accuracy.compute(predictions=predictions, references=labels)

# CHANGE WHEN GOING BETWEEN CLASSES!!
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

if k_bit_quantization:
    model = prepare_model_for_kbit_training(model)
    model.config.use_cache = False  # Compatibility setting for quantization

# Apply LoRA to the model
model = get_peft_model(model, lora_config)

# Set up training arguments
training_args = TrainingArguments(
    output_dir=f"results/{model_name}",
    per_device_train_batch_size=per_device_batch_size,
    per_device_eval_batch_size=per_device_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    evaluation_strategy='steps',
    max_steps=400,
    logging_steps=10,
    eval_steps=50,
    save_steps=50,
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
)

import wandb

##update for new proj or run
wandb.init(project="LoRA_400step_500r_2class", name="LoRA_Rank_500")
# Define a function to save and log LoRA parameters
def save_lora_params(model, artifact_name="lora_params"):
    # Extract LoRA parameters
    lora_params = {
        name: param.clone().detach()
        for name, param in model.named_parameters()
        if "lora_" in name  # Filters LoRA-specific parameters
    }

    # Save parameters to a local file
    file_path = f"{artifact_name}.pt"
    torch.save(lora_params, file_path)
    print(f"LoRA parameters saved locally to {file_path}")

    # Log parameters as a W&B artifact
    artifact = wandb.Artifact(artifact_name, type="model")
    artifact.add_file(file_path)
    wandb.log_artifact(artifact)
    print(f"LoRA parameters logged to W&B as artifact: {artifact_name}")

# Start training
trainer.train()

save_lora_params(model, artifact_name="lora_params")


Train label distribution: {0, 1}
Validation label distribution: {0, 1}
{'labels': 0, 'text': 'My fault by trying to be cheap and buying wrong cord but it did work a little', 'input_ids': [101, 2026, 6346, 2011, 2667, 2000, 2022, 10036, 1998, 9343, 3308, 11601, 2021, 2009, 2106, 2147, 1037, 2210, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
max_steps is given, it will override any value given in num_train_epochs


VBox(children=(Label(value='105.533 MB of 105.533 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/accuracy,▁▆▇█████
eval/loss,█▅▂▁▁▁▁▁
eval/runtime,▆▁▄█▇▁▂▅
eval/samples_per_second,▃█▅▁▂█▇▄
eval/steps_per_second,▂█▅▁▂█▇▄
train/epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/grad_norm,▁▆▄▂▃▄▄▄▄▄▅▃▃▃▄▂▃▃▄▄▃▃▅▂▃▂▆▅▃▂▄▁█▂▅▂▄▅▂▄
train/learning_rate,███▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁▁
train/loss,██▇█▇▇█▆▆▅▄▄▃▄▃▃▄▄▃▃▂▃▃▁▃▂▂▂▃▃▂▁▂▂▁▂▃▂▃▁

0,1
eval/accuracy,0.89782
eval/loss,0.26968
eval/runtime,52.6317
eval/samples_per_second,108.034
eval/steps_per_second,13.509
total_flos,2227522791014400.0
train/epoch,0.12571
train/global_step,400.0
train/grad_norm,6.09651
train/learning_rate,0.0


Step,Training Loss,Validation Loss,Accuracy
50,0.6254,0.595935,0.674288
100,0.4524,0.435673,0.811819
150,0.3541,0.306289,0.875132
200,0.2482,0.280283,0.887795
250,0.2928,0.265788,0.898171
300,0.3528,0.262163,0.899402
350,0.2625,0.26946,0.895005
400,0.1964,0.262759,0.898874


LoRA parameters saved locally to lora_params.pt
LoRA parameters logged to W&B as artifact: lora_params
