In [1]:
from transformers import (
    AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
)
from datasets import Dataset
import numpy as np
 
# Sample dataset for binary classification
data = {
    "text": [
        "The new iPhone looks amazing!",
        "The game was boring and unwatchable.",
        "I absolutely loved the movie!",
        "This laptop performs very poorly.",
        "Fantastic service and great food!"
    ],
    "label": [1, 0, 1, 0, 1]  # 1 = positive, 0 = negative
}
 
# Convert to Hugging Face dataset
dataset = Dataset.from_dict(data).train_test_split(test_size=0.2)
 
# Load tokenizer and model
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
 
# Tokenize dataset
def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True)
 
tokenized_ds = dataset.map(tokenize, batched=True)
tokenized_ds.set_format("torch", columns=["input_ids", "attention_mask", "label"])
 
# Training arguments
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=3,
    logging_dir="./logs",
    load_best_model_at_end=True,
    save_strategy="epoch",
    evaluation_strategy="epoch"  # <-- Add this line to match save_strategy
)
 
# Define Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_ds["train"],
    eval_dataset=tokenized_ds["test"]
)
 
# Train model
trainer.train()
 
# Inference
sample = "I hated the user interface of this app."
tokens = tokenizer(sample, return_tensors="pt", truncation=True, padding=True)
output = model(**tokens)
pred = int(np.argmax(output.logits.detach().numpy()))
print("📝 Sample:", sample)
print("🔖 Predicted Sentiment:", "Positive" if pred == 1 else "Negative")

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4 [00:00<?, ? examples/s]

Map:   0%|          | 0/1 [00:00<?, ? examples/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 0.7227716445922852, 'eval_runtime': 0.3608, 'eval_samples_per_second': 2.772, 'eval_steps_per_second': 2.772, 'epoch': 1.0}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 0.6896909475326538, 'eval_runtime': 0.3993, 'eval_samples_per_second': 2.504, 'eval_steps_per_second': 2.504, 'epoch': 2.0}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 0.6715867519378662, 'eval_runtime': 0.4122, 'eval_samples_per_second': 2.426, 'eval_steps_per_second': 2.426, 'epoch': 3.0}
{'train_runtime': 34.2921, 'train_samples_per_second': 0.35, 'train_steps_per_second': 0.175, 'train_loss': 0.638109008471171, 'epoch': 3.0}
📝 Sample: I hated the user interface of this app.
🔖 Predicted Sentiment: Negative
