## Finetuning llama chat model for classification

In [1]:
import torch
print(torch.cuda.is_available())  # Should return True if CUDA is enabled
print(torch.__version__)  # Check PyTorch version
print(torch.version.cuda)  # Check CUDA version

True
2.6.0+cu124
12.4


In [2]:
import torch
torch.cuda.empty_cache()
torch.cuda.memory_allocated(), torch.cuda.memory_reserved()


(0, 0)

### Load Pretrained llama 2 7b-chat

In [1]:
from transformers import LlamaForSequenceClassification, LlamaTokenizer

model_name = "meta-llama/Llama-2-7b-chat-hf"

# Load tokenizer
tokenizer = LlamaTokenizer.from_pretrained(model_name)

# Add the custom separator token
tokenizer.add_special_tokens({'additional_special_tokens': ['[SEP_CUSTOM]']})

# Save the updated tokenizer for consistency
tokenizer.save_pretrained("updated_tokenizer")
tokenizer.pad_token = tokenizer.eos_token  # Set pad token

# Load model for classification (add classification head)
num_labels = 2  # Binary classification (factual or non-factual)
model = LlamaForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)

model.config.pad_token_id = tokenizer.pad_token_id
model.resize_token_embeddings(len(tokenizer))


  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.04s/it]
Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-2-7b-chat-hf and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


Embedding(32001, 4096)

### Load the Xsum dataset

In [2]:
from datasets import Dataset

dataset = Dataset.load_from_disk("backup/xsum_factual_combined_with_gold")
print(dataset)

# Split the dataset into 80% test and 20% validation
split_datasets = dataset.train_test_split(test_size=0.2, seed=42)

# Access the test and validation datasets
train_dataset = split_datasets['train']  # 80% (test)
validation_dataset = split_datasets['test']  # 20% (validation)

# Verify the splits
print(f"Train dataset size: {len(train_dataset)}")
print(f"Validation dataset size: {len(validation_dataset)}")

Dataset({
    features: ['id', 'doc', 'summary', 'is_factual'],
    num_rows: 11194
})
Train dataset size: 8955
Validation dataset size: 2239


### Tokenize and filter data

In [4]:
def preprocess_function(examples):
    # Combine document and summary with [SEP]
    inputs = [doc + "[SEP_CUSTOM]" + summary for doc, summary in zip(examples['doc'], examples['summary'])]
    
    # Tokenize the inputs
    model_inputs = tokenizer(inputs, truncation=True, padding="max_length", max_length=512)
    
    # Add labels to the tokenized inputs
    model_inputs["labels"] = examples["is_factual"]
    return model_inputs

# Tokenize datasets
tokenized_train = train_dataset.map(preprocess_function, batched=True)
tokenized_val = validation_dataset.map(preprocess_function, batched=True)

In [5]:
def filter_invalid_labels(dataset):
    # Remove entries where the label is -1
    return dataset.filter(lambda example: example['labels'] != -1)

tokenized_train = filter_invalid_labels(tokenized_train)
tokenized_val = filter_invalid_labels(tokenized_val)

### Setup LoRA parameters through peft

In [6]:
for param in model.parameters():
    param.requires_grad = False

In [7]:
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],  # Apply LoRA to all attention layers
    lora_dropout=0.05,
    bias="none",
    task_type="SEQ_CLS",
)

peft_model = get_peft_model(model, lora_config)
peft_model.print_trainable_parameters()


trainable params: 8,396,800 || all params: 6,615,752,704 || trainable%: 0.1269


### Training arguments

In [8]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    per_device_train_batch_size=4,  # Reduce batch size
    logging_steps=100,
    output_dir="./llama_chat_xsum",
    evaluation_strategy="epoch",
    learning_rate=1e-5,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
    save_strategy="epoch",
    save_total_limit=1
)

# model.gradient_checkpointing_enable()  # Reduce memory usage




In [9]:
import numpy as np
import evaluate

# Load metrics
accuracy_metric = evaluate.load("accuracy")
f1_metric = evaluate.load("f1")

# Define compute_metrics for validation
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = accuracy_metric.compute(predictions=predictions, references=labels)
    f1 = f1_metric.compute(predictions=predictions, references=labels, average="macro")
    return {
        "accuracy": accuracy["accuracy"],
        "f1": f1["f1"]
    }

In [10]:
from transformers import Trainer, TrainingArguments

trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)
trainer.train()


  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.5835,0.557268,0.718121,0.714578
2,0.4175,0.452659,0.750336,0.747915
3,0.3249,0.439629,0.761969,0.758626




TrainOutput(global_step=6696, training_loss=0.5304888687680699, metrics={'train_runtime': 24648.3216, 'train_samples_per_second': 1.086, 'train_steps_per_second': 0.272, 'total_flos': 5.334425708821217e+17, 'train_loss': 0.5304888687680699, 'epoch': 3.0})

## Load the local model

In [22]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from peft import PeftModel
import torch

# Path where your model was saved
model_path = "./llama_chat_xsum/checkpoint-6696"


# Load base model
base_model_name = "meta-llama/Llama-2-7b-chat-hf"  # Use base model name
base_model = AutoModelForSequenceClassification.from_pretrained(base_model_name)
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
tokenizer.add_special_tokens({'additional_special_tokens': ['[SEP_CUSTOM]']})
tokenizer.pad_token = tokenizer.eos_token 
base_model.resize_token_embeddings(len(tokenizer))


# Load fine-tuned adapter
model = PeftModel.from_pretrained(base_model, model_path)


tokenizer.pad_token = tokenizer.eos_token 
model.eval()


Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.25s/it]
Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-2-7b-chat-hf and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


PeftModelForSequenceClassification(
  (base_model): LoraModel(
    (model): LlamaForSequenceClassification(
      (model): LlamaModel(
        (embed_tokens): Embedding(32001, 4096)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear(
                (base_layer): Linear(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
          

### Test classification on a sample

In [None]:
text = "The stock market saw a major crash due to economic instability.[SEP_CUSTOM] The market crashed significantly."
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)

with torch.no_grad():
    outputs = model(**inputs)

logits = outputs.logits
print(logits)
predicted_class = logits.argmax(dim=-1).item()
print(f"Predicted Class: {predicted_class}")

tensor([[-0.2060,  0.0129]])
Predicted Class: 1


In [None]:
tokens = tokenizer.encode("Document text [SEP_CUSTOM] Summary text", add_special_tokens=True)
print(tokenizer.convert_ids_to_tokens(tokens))


['<s>', '▁Document', '▁text', '▁', '[SEP_CUSTOM]', '▁', '▁Sum', 'mary', '▁text']


: 