In [None]:
# Install required libraries
pip install torch transformers datasets

# Verify GPU availability
import torch
print(torch.cuda.is_available())

In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from datasets import load_dataset

# Load model and tokenizer
model_name = "distilbert-base-uncased"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
# Load IMDB dataset
dataset = load_dataset("imdb")

# Take a subset of 2000 reviews for faster training
train_dataset = dataset["train"].shuffle(seed=42).select(range(1600))
test_dataset = dataset["test"].shuffle(seed=42).select(range(400))

# Tokenize the dataset
def preprocess_function(examples):
    return tokenizer(examples["text"], truncation=True, padding=True)

tokenized_train = train_dataset.map(preprocess_function, batched=True)
tokenized_test = test_dataset.map(preprocess_function, batched=True)

In [None]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./sentiment_model",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
)

In [None]:
# Start fine-tuning
trainer.train()

# Save the fine-tuned model
model.save_pretrained("./sentiment_model")
tokenizer.save_pretrained("./sentiment_model")

In [None]:
import numpy as np
from sklearn.metrics import classification_report

# Generate predictions
predictions = trainer.predict(tokenized_test)
preds = np.argmax(predictions.predictions, axis=1)

# Get true labels
labels = tokenized_test["label"]

# Print evaluation metrics
print(classification_report(labels, preds, target_names=["Negative", "Positive"]))