In [5]:
!pip install transformers torch datasets -q

In [6]:
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, Trainer
from datasets import load_dataset
import torch

In [7]:
# Load zero-shot classification pipeline
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

Device set to use cuda:0


In [8]:
# Example text and candidate labels
text = "The government passed a new policy on renewable energy incentives."
candidate_labels = ["politics", "environment", "sports", "technology"]

# Perform zero-shot classification
result = classifier(text, candidate_labels)
result

{'sequence': 'The government passed a new policy on renewable energy incentives.',
 'labels': ['environment', 'politics', 'technology', 'sports'],
 'scores': [0.3944079875946045,
  0.306060254573822,
  0.28545647859573364,
  0.014075360260903835]}

In [9]:
for label, score in zip(result["labels"], result["scores"]):
    print(f"{label}: {score:.4f}")

environment: 0.3944
politics: 0.3061
technology: 0.2855
sports: 0.0141


In [10]:
dataset = load_dataset("yelp_polarity", split="train[:2000]").train_test_split(test_size=0.2)
dataset

README.md: 0.00B [00:00, ?B/s]

plain_text/train-00000-of-00001.parquet:   0%|          | 0.00/256M [00:00<?, ?B/s]

plain_text/test-00000-of-00001.parquet:   0%|          | 0.00/17.7M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/560000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/38000 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 1600
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 400
    })
})

In [11]:
# Load pretrained model and tokenizer
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

def tokenize_fn(batch):
    return tokenizer(batch["text"], truncation=True, padding="max_length", max_length=128)

tokenized_datasets = dataset.map(tokenize_fn, batched=True)

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

In [12]:
# Training setup (quick run for demo)
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    logging_dir="./logs",
    logging_steps=10,
    save_strategy="no",
    report_to="none",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
)

trainer.train()

Step,Training Loss
10,0.6845
20,0.6086
30,0.5847
40,0.4538
50,0.4223
60,0.289
70,0.2775
80,0.4969
90,0.3419
100,0.4722


TrainOutput(global_step=200, training_loss=0.3802846425771713, metrics={'train_runtime': 19.8298, 'train_samples_per_second': 80.687, 'train_steps_per_second': 10.086, 'total_flos': 52986959462400.0, 'train_loss': 0.3802846425771713, 'epoch': 1.0})

In [14]:
test_text = "I love how efficient the new electric cars are."

# Zero-shot classification
zs_result = classifier(test_text, ["positive", "negative"])
print("Zero-Shot Classification:")
print(zs_result)

# Few-shot (fine-tuned model)
inputs = tokenizer(test_text, return_tensors="pt", truncation=True, padding=True)
# Move inputs to the same device as the model
inputs = {k: v.to(model.device) for k, v in inputs.items()}
outputs = model(**inputs)
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
print("\nFew-Shot Fine-Tuned Model:")
print(f"Positive: {probs[0][1]:.4f}, Negative: {probs[0][0]:.4f}")

Zero-Shot Classification:
{'sequence': 'I love how efficient the new electric cars are.', 'labels': ['positive', 'negative'], 'scores': [0.9952255487442017, 0.004774483386427164]}

Few-Shot Fine-Tuned Model:
Positive: 0.9814, Negative: 0.0186
