Project: Sentiment classifer for Amazon reviews
Dataset: Amazon Polarity - https://huggingface.co/datasets/fancyzhx/amazon_polarity
Model: distilbert-base-uncased

In [1]:
%pip install torch numpy peft datasets transformers





[notice] A new release of pip available: 22.2.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip





In [2]:
from transformers import TrainingArguments, Trainer, AutoModelForSequenceClassification
from datasets import load_dataset
import torch
from peft import LoraConfig, get_peft_model
from transformers import Trainer, AutoTokenizer
import numpy as np


In [3]:
# get test and train data and tokenize 

dataset = load_dataset("amazon_polarity")

train_dataset = dataset['train'].shuffle(seed=42).select(range(1000))
test_dataset = dataset['test'].shuffle(seed=42).select(range(1000))

tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')

def process_text(example):
    return tokenizer(example["content"], truncation=True, padding="max_length")

tokenized_train_dataset = train_dataset.map(process_text, batched=True)
tokenized_test_dataset = test_dataset.map(process_text, batched=True)

In [4]:
# load pretrained model "distilbert-base-uncased"

model = AutoModelForSequenceClassification.from_pretrained('distilbert-base-uncased', 
id2label={0: "NEGATIVE", 1: "POSITIVE"},
label2id={"NEGATIVE": 0, "POSITIVE": 1},
num_labels=2)

# args for training 
training_args = TrainingArguments(
    output_dir="./results",
    per_device_eval_batch_size=4,
    per_device_train_batch_size=4,
    evaluation_strategy="epoch",
    num_train_epochs=5,
    do_eval=True,
    save_strategy='epoch',
)

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return {'accuracy': (predictions == labels).mean()}

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    eval_dataset=tokenized_test_dataset,
    compute_metrics=compute_metrics,
    train_dataset=tokenized_train_dataset

)

initial_evaluation = trainer.evaluate()
print("Initial Evaluation:", initial_evaluation) 


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/250 [00:00<?, ?it/s]

Initial Evaluation: {'eval_loss': 0.6921502947807312, 'eval_model_preparation_time': 0.001, 'eval_accuracy': 0.514, 'eval_runtime': 148.8218, 'eval_samples_per_second': 6.719, 'eval_steps_per_second': 1.68}


In [5]:
# turn model into peft model with lora, targeting classifer layer for training
config = LoraConfig(target_modules=["classifier"],)
lora_model = get_peft_model(model, config)
lora_model.print_trainable_parameters()

trainable params: 6,160 || all params: 66,961,170 || trainable%: 0.0092


In [6]:
# create trainer and save trained model 

ft_training_args = TrainingArguments(
    output_dir="./lora_results",
    per_device_eval_batch_size=8,
    per_device_train_batch_size=8,
    evaluation_strategy="epoch",
    num_train_epochs=5,
    do_eval=True,
    save_strategy='epoch',
)

ft_trainer = Trainer(
    model=lora_model,
    args=ft_training_args,
    eval_dataset=tokenized_test_dataset,
    compute_metrics=compute_metrics,
    train_dataset=tokenized_train_dataset

)

ft_trainer.train()

lora_model.save_pretrained("./fine-tuned")

  0%|          | 0/625 [00:00<?, ?it/s]

  0%|          | 0/125 [00:00<?, ?it/s]

{'eval_runtime': 149.3156, 'eval_samples_per_second': 6.697, 'eval_steps_per_second': 0.837, 'epoch': 1.0}


  0%|          | 0/125 [00:00<?, ?it/s]

{'eval_runtime': 160.8499, 'eval_samples_per_second': 6.217, 'eval_steps_per_second': 0.777, 'epoch': 2.0}


  0%|          | 0/125 [00:00<?, ?it/s]

{'eval_runtime': 162.8837, 'eval_samples_per_second': 6.139, 'eval_steps_per_second': 0.767, 'epoch': 3.0}
{'loss': 0.694, 'grad_norm': 0.11125710606575012, 'learning_rate': 1e-05, 'epoch': 4.0}


  0%|          | 0/125 [00:00<?, ?it/s]

{'eval_runtime': 167.7662, 'eval_samples_per_second': 5.961, 'eval_steps_per_second': 0.745, 'epoch': 4.0}


  0%|          | 0/125 [00:00<?, ?it/s]

{'eval_runtime': 160.0349, 'eval_samples_per_second': 6.249, 'eval_steps_per_second': 0.781, 'epoch': 5.0}
{'train_runtime': 2222.1711, 'train_samples_per_second': 2.25, 'train_steps_per_second': 0.281, 'train_loss': 0.69397978515625, 'epoch': 5.0}


In [7]:
#evaluate fine tuned/trained model 

final_model = AutoModelForSequenceClassification.from_pretrained("./fine-tuned")

final_trainer = Trainer(
	args=ft_training_args,
	compute_metrics=compute_metrics,
	model=final_model,
	eval_dataset=tokenized_test_dataset,
	train_dataset=tokenized_train_dataset,
)

final_evaluation = final_trainer.evaluate()
print("final evaluation:", final_evaluation)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/125 [00:00<?, ?it/s]

final evaluation: {'eval_loss': 0.691296398639679, 'eval_model_preparation_time': 0.001, 'eval_accuracy': 0.538, 'eval_runtime': 159.521, 'eval_samples_per_second': 6.269, 'eval_steps_per_second': 0.784}


Inital evaluation eval accurancy: 0.514
Final evaluation eval accuracy: 0.538
