In [None]:
# Install necessary libraries
# !pip install transformers datasets

import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset, DatasetDict

# Load the pre-trained model and tokenizer
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Load your dataset
# Replace this with your custom dataset
# For demonstration, we'll use the IMDb dataset from the Hugging Face Hub
raw_datasets = load_dataset("imdb")

# Preprocess the dataset
def preprocess_function(examples):
    return tokenizer(examples['text'], truncation=True, padding="max_length", max_length=128)

# Tokenize the datasets
tokenized_datasets = raw_datasets.map(preprocess_function, batched=True)

# Split the dataset into train/test sets
# IMDb already has train and test sets, so we just rename them
dataset = DatasetDict({
    "train": tokenized_datasets["train"],
    "test": tokenized_datasets["test"]
})

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",              # Output directory for model checkpoints
    evaluation_strategy="epoch",        # Evaluate at the end of each epoch
    save_strategy="epoch",              # Save model at the end of each epoch
    logging_dir="./logs",               # Directory for logs
    learning_rate=5e-5,                 # Learning rate
    per_device_train_batch_size=16,     # Batch size for training
    per_device_eval_batch_size=64,      # Batch size for evaluation
    num_train_epochs=3,                 # Number of training epochs
    weight_decay=0.01,                  # Weight decay
    save_total_limit=2,                 # Limit number of checkpoints saved
    load_best_model_at_end=True,        # Load the best model at the end of training
    metric_for_best_model="accuracy",   # Metric to compare best model
    report_to="none"                    # Disable reporting to external dashboards like WandB
)

# Define a function to compute metrics
from datasets import load_metric
import numpy as np

metric = load_metric("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],  # Training dataset
    eval_dataset=dataset["test"],    # Evaluation dataset
    tokenizer=tokenizer,             # Tokenizer
    compute_metrics=compute_metrics  # Metric computation
)

# Fine-tune the model
trainer.train()

# Save the fine-tuned model
model.save_pretrained("./fine_tuned_model")
tokenizer.save_pretrained("./fine_tuned_model")

# Test your fine-tuned model
from transformers import pipeline

# Load the fine-tuned model
sentiment_analysis = pipeline("sentiment-analysis", model="./fine_tuned_model")

# Example input
texts = ["I love this movie!", "The product was terrible and I hate it."]

# Perform sentiment analysis
results = sentiment_analysis(texts)
for result in results:
    print(f"Text: {result['label']}, Confidence: {result['score']:.4f}")