In [1]:
import transformers
from transformers import TrainingArguments
print("Transformers version:", transformers.__version__)
print("TrainingArguments args:", TrainingArguments.__init__.__code__.co_varnames)


Transformers version: 4.51.3
TrainingArguments args: ('self', 'output_dir', 'overwrite_output_dir', 'do_train', 'do_eval', 'do_predict', 'eval_strategy', 'prediction_loss_only', 'per_device_train_batch_size', 'per_device_eval_batch_size', 'per_gpu_train_batch_size', 'per_gpu_eval_batch_size', 'gradient_accumulation_steps', 'eval_accumulation_steps', 'eval_delay', 'torch_empty_cache_steps', 'learning_rate', 'weight_decay', 'adam_beta1', 'adam_beta2', 'adam_epsilon', 'max_grad_norm', 'num_train_epochs', 'max_steps', 'lr_scheduler_type', 'lr_scheduler_kwargs', 'warmup_ratio', 'warmup_steps', 'log_level', 'log_level_replica', 'log_on_each_node', 'logging_dir', 'logging_strategy', 'logging_first_step', 'logging_steps', 'logging_nan_inf_filter', 'save_strategy', 'save_steps', 'save_total_limit', 'save_safetensors', 'save_on_each_node', 'save_only_model', 'restore_callback_states_from_checkpoint', 'no_cuda', 'use_cpu', 'use_mps_device', 'seed', 'data_seed', 'jit_mode_eval', 'use_ipex', 'bf16'

In [7]:
# 🚀 Fine-Tune Transformers on Custom Dataset (GPU Enabled with Sarcasm Heuristic)

import warnings
warnings.filterwarnings("ignore")

# ✅ 1. Imports
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding,
    pipeline
)
from datasets import load_dataset
import torch

# ✅ 2. Load Dataset
dataset = load_dataset("imdb")
dataset = dataset.shuffle(seed=42)

# ✅ 3. Tokenizer & Tokenization
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

def tokenize_function(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=256)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

# ✅ 4. Model
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2).to("cuda")

# ✅ 5. Data Collator
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# ✅ 6. Training Arguments
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=2,
    logging_dir="./logs",
    logging_steps=10,
    save_total_limit=2,
    load_best_model_at_end=True,
    report_to="none"
)

# ✅ 7. Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"].select(range(2000)),
    eval_dataset=tokenized_datasets["test"].select(range(1000)),
    tokenizer=tokenizer,
    data_collator=data_collator,
)

# ✅ 8. Train
trainer.train()

# ✅ 9. Evaluate
results = trainer.evaluate()
print("\nEvaluation Results:", results)

# ✅ 10. Save Model
trainer.save_model("./fine-tuned-distilbert-imdb")
tokenizer.save_pretrained("./fine-tuned-distilbert-imdb")

# ✅ 11. Inference Setup
classifier = pipeline(
    "sentiment-analysis",
    model="./fine-tuned-distilbert-imdb",
    tokenizer="./fine-tuned-distilbert-imdb",
    device=0 if torch.cuda.is_available() else -1
)

# ✅ 12. Device Info
if torch.cuda.is_available():
    print("\n✅ Using GPU:", torch.cuda.get_device_name(0))
    print("   CUDA Version:", torch.version.cuda)
    print("   cuDNN Enabled:", torch.backends.cudnn.enabled)
else:
    print("\n⚠️ Running on CPU. Consider enabling a GPU for better performance.")

# ✅ 13. Label Mapping
label_map = {"LABEL_0": "Negative", "LABEL_1": "Positive"}

# ✅ 14. Simple Sarcasm Heuristic
def is_potentially_sarcastic(text):
    sarcastic_cues = [
        "oh great", "wow", "just what i wanted", "how amazing", "i love it when",
        "totally not", "what a surprise", "can’t wait", "of course", "brilliant idea",
        "thanks a lot", "as expected", "another one", "sure thing"
    ]
    text = text.lower()
    return any(phrase in text for phrase in sarcastic_cues)

# ✅ 15. Inference with Sarcasm Flag
examples = [
    "This movie was absolutely fantastic!",
    "Wow... this movie was so amazing, I fell asleep in 10 minutes.",
    "Thanks a lot for ruining my day.",
    "The visuals were breathtaking and the story was gripping.",
    "Of course, the plot twist was so obvious, not."
]

for text in examples:
    result = classifier(text)[0]
    label = label_map.get(result["label"], result["label"])
    sarcasm = is_potentially_sarcastic(text)
    print(f"\n🔍 {text}")
    print(f" → Sentiment: {label} ({result['score']:.2%})")
    if sarcasm:
        print(" ⚠️  Potential sarcasm detected based on phrasing!")


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss
1,0.3617,0.360662
2,0.3257,0.473392



Evaluation Results: {'eval_loss': 0.3606622517108917, 'eval_runtime': 4.3513, 'eval_samples_per_second': 229.816, 'eval_steps_per_second': 28.727, 'epoch': 2.0}


Device set to use cuda:0



✅ Using GPU: NVIDIA GeForce RTX 4070 Laptop GPU
   CUDA Version: 12.1
   cuDNN Enabled: True

🔍 This movie was absolutely fantastic!
 → Sentiment: Positive (94.98%)

🔍 Wow... this movie was so amazing, I fell asleep in 10 minutes.
 → Sentiment: Positive (91.53%)
 ⚠️  Potential sarcasm detected based on phrasing!

🔍 Thanks a lot for ruining my day.
 → Sentiment: Positive (69.67%)
 ⚠️  Potential sarcasm detected based on phrasing!

🔍 The visuals were breathtaking and the story was gripping.
 → Sentiment: Positive (94.83%)

🔍 Of course, the plot twist was so obvious, not.
 → Sentiment: Negative (83.68%)
 ⚠️  Potential sarcasm detected based on phrasing!
