# 5. Zero shot

Test Zero Shot models in prior to training to see its raw performance

In [1]:
import os
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
from datasets import Dataset

import mlflow

from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score,
    precision_recall_fscore_support,
    classification_report,
    confusion_matrix,
    ConfusionMatrixDisplay,
)

from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    EarlyStoppingCallback,
    pipeline,
)


import torch
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"GPU count: {torch.cuda.device_count()}")
if torch.cuda.is_available():
    print(f"GPU name: {torch.cuda.get_device_name(0)}")
else:
    print("No GPU available.")


  from .autonotebook import tqdm as notebook_tqdm


CUDA available: True
GPU count: 1
GPU name: NVIDIA GeForce RTX 4050 Laptop GPU


In [2]:
df = pd.read_csv("../data/cleaned.csv")
print(df.head())
print(df['label'].value_counts())

   id                                            comment   label  \
0   0  Sponsored by dbrand. Skin your Galaxy Fold7 (o...  others   
1   1  Finally a review of the zfold 7 from an actual...  others   
2   2  omg the script next to the selfie video... tha...  others   
3   3  I've said this before and I'll say it again. M...  others   
4   4  my man starts the video with a shot at a beaut...  others   

   label_encoded  
0              3  
1              3  
2              3  
3              3  
4              3  
label
others         927
design         162
camera         123
battery        117
price           66
performance     62
Name: count, dtype: int64


In [3]:
df.head()

Unnamed: 0,id,comment,label,label_encoded
0,0,Sponsored by dbrand. Skin your Galaxy Fold7 (o...,others,3
1,1,Finally a review of the zfold 7 from an actual...,others,3
2,2,omg the script next to the selfie video... tha...,others,3
3,3,I've said this before and I'll say it again. M...,others,3
4,4,my man starts the video with a shot at a beaut...,others,3


## Zero-Shot Classifiction
Evaluate how well each pretrained model (DistilBERT, BERT, RoBERTa) can classify text into the 6 categories without fine-tuning.

In [4]:
candidate_labels = ['battery', 'camera', 'design', 'others', 'performance', 'price']

models = {
    "distilbert": "MoritzLaurer/deberta-v3-base-zeroshot-v1",
    "bert": "typeform/distilbert-base-uncased-mnli",
    "roberta": "roberta-large-mnli"
}

results = {}

for name, model_name in models.items():
    print(f"Running zero-shot classification using {name}...")
    classifier = pipeline("zero-shot-classification", model=model_name)
    preds = []
    for text in df['comment']:
        result = classifier(text, candidate_labels)
        preds.append(result['labels'][0])  # take top predicted label
    df[f'pred_{name}'] = preds


for name in models.keys():
    acc = accuracy_score(df['label'], df[f'pred_{name}'])
    print(f"\n{name.upper()} Accuracy: {acc:.3f}")
    print(classification_report(df['label'], df[f'pred_{name}']))



Running zero-shot classification using distilbert...


Device set to use cuda:0
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Running zero-shot classification using bert...


Device set to use cuda:0


Running zero-shot classification using roberta...


Some weights of the model checkpoint at roberta-large-mnli were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cuda:0



DISTILBERT Accuracy: 0.266
              precision    recall  f1-score   support

     battery       0.75      0.55      0.63       117
      camera       0.38      0.71      0.49       123
      design       0.18      0.41      0.25       162
      others       0.97      0.07      0.12       927
 performance       0.09      0.87      0.17        62
       price       0.43      0.83      0.57        66

    accuracy                           0.27      1457
   macro avg       0.47      0.57      0.37      1457
weighted avg       0.75      0.27      0.23      1457


BERT Accuracy: 0.358
              precision    recall  f1-score   support

     battery       0.55      0.81      0.65       117
      camera       0.44      0.62      0.52       123
      design       0.17      0.13      0.15       162
      others       0.82      0.28      0.41       927
 performance       0.07      0.47      0.12        62
       price       0.17      0.67      0.27        66

    accuracy               