### HuggingFace - Trainer - Fine-Tuning - Accelerate


---


In [9]:
import torch
from accelerate import Accelerator
from datasets import load_dataset
from sklearn.metrics import accuracy_score
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    Trainer,
    TrainingArguments,
)

In [2]:
# 1. Load the dataset
dataset = load_dataset("imdb")

# 2. Load the tokenizer
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(model_name)


# 3. Define the tokenization function and compute_metrics function
def tokenize_function(examples):
    return tokenizer(examples["text"], padding=True, truncation=True, max_length=512)


# Define a compute_metrics function
def compute_metrics(p):
    predictions, labels = p
    predictions = predictions.argmax(axis=-1)  # Convert logits to predicted class ids
    accuracy = accuracy_score(labels, predictions)
    return {"accuracy": accuracy}


# 4. Tokenize only the 'train' and 'test' splits
tokenized_train = dataset["train"].map(tokenize_function, batched=True)
tokenized_test = dataset["test"].map(tokenize_function, batched=True)

In [3]:
# 5. Load the model
model = AutoModelForSequenceClassification.from_pretrained(model_name)

In [4]:
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

model = model.to(device)

Using device: cuda


In [5]:
# 6. Select the subset of data from 10000 to 15000 /\ 0-12.5k = 0, 12.5k-25k = 1 /\
train_subset = tokenized_train.select(
    range(10000, 15000)
)  # Train dataset from 10000-15000
test_subset = tokenized_test.select(
    range(10000, 15000)
)  # Test dataset from 10000-15000

In [6]:
# 7. Set the training parameters with optimizations for speed
training_args = TrainingArguments(
    output_dir="./trainer_results",  # Directory to store output files
    eval_strategy="epoch",  # Perform evaluation after each epoch
    learning_rate=2e-5,  # Learning rate
    per_device_train_batch_size=16,  # Training batch size
    per_device_eval_batch_size=32,  # Evaluation batch size
    num_train_epochs=3,  # Number of epochs for training
    weight_decay=0.01,  # Weight decay to avoid overfitting
    gradient_accumulation_steps=4,  # Accumulate gradients to simulate a larger batch size
    dataloader_num_workers=4,  # Use multiple workers for faster data loading
    fp16=True,  # Use mixed precision to speed up training on GPU
    # logging_dir="./logs",  # Optional: Directory to store logs
    # logging_steps=50,  # Log every 50 steps for faster feedback
    # load_best_model_at_end=True,  # Optional: Load the best model after training ends
    # save_strategy="epoch",  # Save the model after each epoch
    # evaluation_strategy="epoch",  # Evaluate after each epoch
)

In [7]:
# 8. Create the Trainer object
trainer = Trainer(
    model=model,  # The model to be trained
    args=training_args,  # The training arguments
    train_dataset=train_subset,  # The training dataset
    eval_dataset=train_subset,  # The evaluation dataset
    compute_metrics=compute_metrics,  # Add the compute_metrics function
)

In [8]:
# 9. Initialize the Accelerator
accelerator = Accelerator()

# 10. Prepare the trainer for accelerator
trainer = accelerator.prepare(trainer)

In [10]:
# 11. Fine-tune the model
trainer.train()

# 12. Evaluate the model on the test set
results = trainer.evaluate()

Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.153273,0.945
2,No log,0.080983,0.978


In [13]:
# 13. Print the results
print(f"Test Loss: {results['eval_loss']:.2f}")
print(f"Test Accuracy: {results['eval_accuracy']:.2f}")

Test Loss: 0.08
Test Accuracy: 0.98


In [17]:
# 14. Save the model
model.save_pretrained("./fine_tuned_model")
tokenizer.save_pretrained("./fine_tuned_model")

('./fine_tuned_model\\tokenizer_config.json',
 './fine_tuned_model\\special_tokens_map.json',
 './fine_tuned_model\\vocab.txt',
 './fine_tuned_model\\added_tokens.json',
 './fine_tuned_model\\tokenizer.json')

In [20]:
# 15. Make a prediction with the model
text = "I really love this movie! It's amazing."

# Tokenize the input text
inputs = tokenizer(
    text, return_tensors="pt", padding=True, truncation=True, max_length=512
)

# Move inputs to the correct device
inputs = {key: value.to(device) for key, value in inputs.items()}

# Make a prediction with the model
with torch.no_grad():
    logits = model(**inputs).logits

# Determine the predicted class
predicted_class_id = logits.argmax().item()

# Print the predicted class (0 = Negative, 1 = Positive)
print(f"Predicted Class: {"Positive" if predicted_class_id == 1 else "Negative"}")

Predicted Class: Positive
